mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[AMDGPU] Use s_add_i32 for address additions
This allows to convert the add instruction to s_addk_i32 and v_add_nc_u32 instead of needing v_add_co_u32 when converting to a VALU instruction. Differential Revision: https://reviews.llvm.org/D103322
This commit is contained in:
parent
a86aa7478c
commit
38d0179c03
@ -1894,7 +1894,7 @@ static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
|
||||
auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
|
||||
SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
|
||||
FI->getValueType(0));
|
||||
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, SDLoc(SAddr),
|
||||
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
|
||||
MVT::i32, TFI, SAddr.getOperand(1)),
|
||||
0);
|
||||
}
|
||||
@ -1936,8 +1936,9 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
|
||||
SAddr.getOpcode() == ISD::TargetFrameIndex
|
||||
? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
|
||||
: CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
|
||||
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32,
|
||||
SAddr, AddOffset), 0);
|
||||
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
|
||||
SAddr, AddOffset),
|
||||
0);
|
||||
}
|
||||
|
||||
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
|
||||
|
@ -3694,9 +3694,9 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
|
||||
const DebugLoc &DL = I.getDebugLoc();
|
||||
SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||
|
||||
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), SAddr)
|
||||
.addFrameIndex(FI)
|
||||
.addReg(RHSDef->Reg);
|
||||
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
|
||||
.addFrameIndex(FI)
|
||||
.addReg(RHSDef->Reg);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -307,9 +307,9 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
|
||||
|
||||
// Add wave offset in bytes to private base offset.
|
||||
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
|
||||
.addReg(FlatScrInitLo)
|
||||
.addReg(ScratchWaveOffsetReg);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
|
||||
.addReg(FlatScrInitLo)
|
||||
.addReg(ScratchWaveOffsetReg);
|
||||
|
||||
// Convert offset to 256-byte units.
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
|
||||
@ -909,9 +909,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
LiveRegs.addLiveIns(MBB);
|
||||
}
|
||||
|
||||
// s_add_u32 s33, s32, NumBytes
|
||||
// s_add_i32 s33, s32, NumBytes
|
||||
// s_and_b32 s33, s33, 0b111...0000
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), FramePtrReg)
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
|
||||
.addReg(StackPtrReg)
|
||||
.addImm((Alignment - 1) * getScratchScaleFactor(ST))
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
@ -937,7 +937,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
}
|
||||
|
||||
if (HasFP && RoundedSize != 0) {
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
|
||||
.addReg(StackPtrReg)
|
||||
.addImm(RoundedSize * getScratchScaleFactor(ST))
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
@ -988,10 +988,10 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
|
||||
|
||||
if (RoundedSize != 0 && hasFP(MF)) {
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
|
||||
.addReg(StackPtrReg)
|
||||
.addImm(RoundedSize * getScratchScaleFactor(ST))
|
||||
.setMIFlag(MachineInstr::FrameDestroy);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
|
||||
.addReg(StackPtrReg)
|
||||
.addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
|
||||
.setMIFlag(MachineInstr::FrameDestroy);
|
||||
}
|
||||
|
||||
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
|
||||
@ -1294,10 +1294,12 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
Register SPReg = MFI->getStackPtrOffsetReg();
|
||||
|
||||
unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
|
||||
BuildMI(MBB, I, DL, TII->get(Op), SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Amount * getScratchScaleFactor(ST));
|
||||
Amount *= getScratchScaleFactor(ST);
|
||||
if (IsDestroy)
|
||||
Amount = -Amount;
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Amount);
|
||||
} else if (CalleePopAmount != 0) {
|
||||
llvm_unreachable("is this used?");
|
||||
}
|
||||
|
@ -703,7 +703,7 @@ Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
|
||||
.addFrameIndex(FrameIdx);
|
||||
|
||||
if (ST.enableFlatScratch() ) {
|
||||
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_U32), BaseReg)
|
||||
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
|
||||
.addReg(OffsetReg, RegState::Kill)
|
||||
.addReg(FIReg);
|
||||
return BaseReg;
|
||||
@ -1113,7 +1113,7 @@ void SIRegisterInfo::buildSpillLoadStore(
|
||||
if (ScratchOffsetReg == AMDGPU::NoRegister) {
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
|
||||
} else {
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
|
||||
.addReg(ScratchOffsetReg)
|
||||
.addImm(Offset);
|
||||
}
|
||||
@ -1262,9 +1262,9 @@ void SIRegisterInfo::buildSpillLoadStore(
|
||||
|
||||
if (ScratchOffsetRegDelta != 0) {
|
||||
// Subtract the offset we added to the ScratchOffset register.
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset)
|
||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
|
||||
.addReg(SOffset)
|
||||
.addImm(ScratchOffsetRegDelta);
|
||||
.addImm(-ScratchOffsetRegDelta);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1707,9 +1707,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
FIOp.setIsKill(false);
|
||||
}
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), TmpSReg)
|
||||
.addReg(FrameReg)
|
||||
.addImm(Offset);
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
|
||||
.addReg(FrameReg)
|
||||
.addImm(Offset);
|
||||
|
||||
if (!UseSGPR)
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
|
||||
@ -1717,10 +1717,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
|
||||
if (TmpSReg == FrameReg) {
|
||||
// Undo frame register modification.
|
||||
BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_SUB_U32),
|
||||
BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
|
||||
FrameReg)
|
||||
.addReg(FrameReg)
|
||||
.addImm(Offset);
|
||||
.addReg(FrameReg)
|
||||
.addImm(-Offset);
|
||||
}
|
||||
|
||||
return;
|
||||
@ -1794,17 +1794,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
|
||||
.addReg(FrameReg)
|
||||
.addImm(ST.getWavefrontSizeLog2());
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg)
|
||||
.addReg(ScaledReg, RegState::Kill)
|
||||
.addImm(Offset);
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
|
||||
.addReg(ScaledReg, RegState::Kill)
|
||||
.addImm(Offset);
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
|
||||
.addReg(ScaledReg, RegState::Kill);
|
||||
|
||||
// If there were truly no free SGPRs, we need to undo everything.
|
||||
if (!TmpScaledReg.isValid()) {
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg)
|
||||
.addReg(ScaledReg, RegState::Kill)
|
||||
.addImm(Offset);
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
|
||||
.addReg(ScaledReg, RegState::Kill)
|
||||
.addImm(-Offset);
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
|
||||
.addReg(FrameReg)
|
||||
.addImm(ST.getWavefrontSizeLog2());
|
||||
|
@ -55,7 +55,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s6, s33
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
|
||||
@ -71,7 +71,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
|
||||
; GFX9-NEXT: s_add_u32 s4, s32, s4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
@ -81,7 +81,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_mov_b32 s6, s33
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
|
||||
@ -95,7 +95,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
|
||||
; GFX10-NEXT: s_and_b32 s4, s4, -16
|
||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
|
||||
; GFX10-NEXT: s_add_u32 s4, s32, s4
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
@ -157,7 +157,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s6, s33
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
|
||||
@ -173,7 +173,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
|
||||
; GFX9-NEXT: s_add_u32 s4, s32, s4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
@ -183,7 +183,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_mov_b32 s6, s33
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
|
||||
@ -197,7 +197,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
|
||||
; GFX10-NEXT: s_and_b32 s4, s4, -16
|
||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
|
||||
; GFX10-NEXT: s_add_u32 s4, s32, s4
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
@ -260,9 +260,9 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s6, s33
|
||||
; GFX9-NEXT: s_add_u32 s33, s32, 0x7c0
|
||||
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
|
||||
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x1000
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x1000
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
|
||||
@ -279,7 +279,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
|
||||
; GFX9-NEXT: s_and_b32 s4, s4, 0xfffff800
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x1000
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xf000
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
@ -288,10 +288,10 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_mov_b32 s6, s33
|
||||
; GFX10-NEXT: s_add_u32 s33, s32, 0x3e0
|
||||
; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x800
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x800
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
|
||||
@ -305,7 +305,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
|
||||
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
|
||||
; GFX10-NEXT: s_add_u32 s4, s32, s4
|
||||
; GFX10-NEXT: s_and_b32 s4, s4, 0xfffffc00
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x800
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xf800
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
|
@ -9,7 +9,7 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) {
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s6, s33
|
||||
; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0
|
||||
; GCN-NEXT: s_add_i32 s33, s32, 0x3fc0
|
||||
; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000
|
||||
; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
|
||||
@ -56,8 +56,8 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) {
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GCN-NEXT: v_add_u32_e32 v1, 0x100, v1
|
||||
; GCN-NEXT: v_add_u32_e32 v0, v1, v0
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x10000
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x10000
|
||||
; GCN-NEXT: s_add_i32 s32, s32, 0x10000
|
||||
; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -270,7 +270,7 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s6, s33
|
||||
; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0
|
||||
; GCN-NEXT: s_add_i32 s33, s32, 0x3fc0
|
||||
; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000
|
||||
; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
|
||||
@ -317,8 +317,8 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GCN-NEXT: v_and_b32_e32 v1, 1, v2
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x10000
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x10000
|
||||
; GCN-NEXT: s_add_i32 s32, s32, 0x10000
|
||||
; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -536,7 +536,7 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s6, s33
|
||||
; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0
|
||||
; GCN-NEXT: s_add_i32 s33, s32, 0x3fc0
|
||||
; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000
|
||||
; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
|
||||
@ -583,8 +583,8 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
||||
; GCN-NEXT: v_add_u32_e32 v2, 0x100, v2
|
||||
; GCN-NEXT: v_add_u32_e32 v1, v2, v0
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x10000
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x10000
|
||||
; GCN-NEXT: s_add_i32 s32, s32, 0x10000
|
||||
; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -13,10 +13,10 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
|
||||
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-NEXT: s_add_u32 s1, 4, s1
|
||||
; GFX9-NEXT: s_add_i32 s1, s1, 4
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX9-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_endpgm
|
||||
@ -33,8 +33,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
|
||||
; GFX10-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX10-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX10-NEXT: s_add_u32 s1, 4, s1
|
||||
; GFX10-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX10-NEXT: s_add_i32 s1, s1, 4
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -185,10 +185,10 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX9-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX9-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_endpgm
|
||||
@ -207,8 +207,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
|
||||
; GFX10-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX10-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX10-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX10-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX10-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -295,7 +295,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x100
|
||||
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, vcc_hi
|
||||
@ -314,7 +314,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
|
||||
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x100
|
||||
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, 15
|
||||
@ -357,10 +357,10 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX9-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX9-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_endpgm
|
||||
@ -379,8 +379,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
|
||||
; GFX10-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX10-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX10-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX10-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX10-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -467,7 +467,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, vcc_hi
|
||||
@ -486,7 +486,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
|
||||
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, 15
|
||||
@ -527,7 +527,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX9-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
@ -543,7 +543,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; GFX10-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX10-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_store_dword off, v1, s0
|
||||
@ -571,7 +571,7 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s32
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: s_add_u32 s0, s32, s0
|
||||
; GFX9-NEXT: s_add_i32 s0, s0, s32
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
@ -585,7 +585,7 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; GFX10-NEXT: s_add_u32 s0, s32, s0
|
||||
; GFX10-NEXT: s_add_i32 s0, s0, s32
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s32
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_store_dword off, v1, s0
|
||||
|
@ -155,7 +155,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
||||
; GCN-NEXT: s_mov_b32 s7, s33
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB2_3
|
||||
; GCN-NEXT: ; %bb.1: ; %bb.0
|
||||
@ -181,7 +181,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GCN-NEXT: s_mov_b32 s33, s7
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
|
||||
@ -217,10 +217,10 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b32 s7, s33
|
||||
; GCN-NEXT: s_add_u32 s33, s32, 0xfc0
|
||||
; GCN-NEXT: s_add_i32 s33, s32, 0xfc0
|
||||
; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x2000
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x2000
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GCN-NEXT: s_cbranch_execz BB3_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb.0
|
||||
@ -243,7 +243,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x2000
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xe000
|
||||
; GCN-NEXT: s_mov_b32 s33, s7
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
|
@ -347,7 +347,7 @@ end:
|
||||
; Check for prologue initializing special SGPRs pointing to scratch.
|
||||
; HSA-LABEL: {{^}}store_flat_scratch:
|
||||
; CI-DAG: s_mov_b32 flat_scratch_lo, s9
|
||||
; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
|
||||
; CI-DAG: s_add_i32 [[ADD:s[0-9]+]], s8, s11
|
||||
; CI-DAG: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
|
||||
|
||||
; GFX9: s_add_u32 flat_scratch_lo, s6, s9
|
||||
|
@ -42,7 +42,7 @@
|
||||
; HSA-ALLOCA: .end_amd_kernel_code_t
|
||||
|
||||
; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7
|
||||
; HSA-ALLOCA: s_add_u32 s6, s6, s9
|
||||
; HSA-ALLOCA: s_add_i32 s6, s6, s9
|
||||
; HSA-ALLOCA: s_lshr_b32 flat_scratch_hi, s6, 8
|
||||
|
||||
; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen ; encoding: [0x00,0x10,0x70,0xe0
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_undef:
|
||||
; SDAG: s_mov_b32 flat_scratch_lo, s13
|
||||
; SDAG: s_add_u32 s12, s12, s17
|
||||
; SDAG: s_add_i32 s12, s12, s17
|
||||
; SDAG: s_lshr_b32
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @test_call_undef() #0 {
|
||||
@ -27,7 +27,7 @@ define i32 @test_tail_call_undef() #0 {
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_null:
|
||||
; SDAG: s_mov_b32 flat_scratch_lo, s13
|
||||
; SDAG: s_add_u32 s12, s12, s17
|
||||
; SDAG: s_add_i32 s12, s12, s17
|
||||
; SDAG: s_lshr_b32
|
||||
|
||||
; GISEL: s_swappc_b64 s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
|
@ -59,8 +59,8 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa
|
||||
; GCN: v_writelane_b32 v40, s33, 4
|
||||
|
||||
; GCN: s_mov_b32 s33, s32
|
||||
; MUBUF: s_add_u32 s32, s32, 0x400
|
||||
; FLATSCR: s_add_u32 s32, s32, 16
|
||||
; MUBUF: s_addk_i32 s32, 0x400
|
||||
; FLATSCR: s_add_i32 s32, s32, 16
|
||||
; GCN: s_swappc_b64
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
|
||||
|
@ -52,14 +52,14 @@ define void @callee_with_stack() #0 {
|
||||
; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33
|
||||
; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; MUBUF-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; FLATSCR-NEXT: s_add_u32 s32, s32, 8
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0x200
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, 8
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
|
||||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4{{$}}
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4{{$}}
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 8
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, -8
|
||||
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @callee_with_stack_no_fp_elim_all() #1 {
|
||||
@ -91,8 +91,8 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
; GCN: v_writelane_b32 [[CSR_VGPR]], s33, 2
|
||||
; GCN-DAG: s_mov_b32 s33, s32
|
||||
; MUBUF-DAG: s_add_u32 s32, s32, 0x400{{$}}
|
||||
; FLATSCR-DAG: s_add_u32 s32, s32, 16{{$}}
|
||||
; MUBUF-DAG: s_addk_i32 s32, 0x400{{$}}
|
||||
; FLATSCR-DAG: s_add_i32 s32, s32, 16{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30,
|
||||
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
|
||||
@ -107,8 +107,8 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
|
||||
; FLATSCR-DAG: v_readlane_b32 s0, [[CSR_VGPR]]
|
||||
; FLATSCR-DAG: v_readlane_b32 s1, [[CSR_VGPR]]
|
||||
|
||||
; MUBUF: s_sub_u32 s32, s32, 0x400{{$}}
|
||||
; FLATSCR: s_sub_u32 s32, s32, 16{{$}}
|
||||
; MUBUF: s_addk_i32 s32, 0xfc00{{$}}
|
||||
; FLATSCR: s_add_i32 s32, s32, -16{{$}}
|
||||
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
@ -136,8 +136,8 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill
|
||||
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
; MUBUF-DAG: s_add_u32 s32, s32, 0x400
|
||||
; FLATSCR-DAG: s_add_u32 s32, s32, 16
|
||||
; MUBUF-DAG: s_addk_i32 s32, 0x400
|
||||
; FLATSCR-DAG: s_add_i32 s32, s32, 16
|
||||
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s33, [[FP_SPILL_LANE:[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
|
||||
@ -149,8 +149,8 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; FLATSCR-DAG: v_readlane_b32 s0, v40, 0
|
||||
; FLATSCR-DAG: v_readlane_b32 s1, v40, 1
|
||||
|
||||
; MUBUF: s_sub_u32 s32, s32, 0x400
|
||||
; FLATSCR: s_sub_u32 s32, s32, 16
|
||||
; MUBUF: s_addk_i32 s32, 0xfc00
|
||||
; FLATSCR: s_add_i32 s32, s32, -16
|
||||
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], [[FP_SPILL_LANE]]
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -251,11 +251,11 @@ define void @spill_only_csr_sgpr() {
|
||||
|
||||
; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
|
||||
; MUBUF: s_add_u32 s32, s32, 0x300
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300
|
||||
; MUBUF: s_addk_i32 s32, 0x300
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0xfd00
|
||||
; MUBUF-NEXT: s_mov_b32 s33, s4
|
||||
; FLATSCR: s_add_u32 s32, s32, 12
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 12
|
||||
; FLATSCR: s_add_i32 s32, s32, 12
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, -12
|
||||
; FLATSCR-NEXT: s_mov_b32 s33, s0
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
@ -284,10 +284,10 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
|
||||
; GCN: ;;#ASMSTART
|
||||
; GCN: v_writelane_b32 v1
|
||||
|
||||
; MUBUF: s_add_u32 s32, s32, 0x400
|
||||
; MUBUF: s_sub_u32 s32, s32, 0x400
|
||||
; FLATSCR: s_add_u32 s32, s32, 16
|
||||
; FLATSCR: s_sub_u32 s32, s32, 16
|
||||
; MUBUF: s_addk_i32 s32, 0x400
|
||||
; MUBUF: s_addk_i32 s32, 0xfc00
|
||||
; FLATSCR: s_add_i32 s32, s32, 16
|
||||
; FLATSCR: s_add_i32 s32, s32, -16
|
||||
; GCN-NEXT: v_readlane_b32 s33, v1, 63
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
@ -330,11 +330,11 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
|
||||
; GCN: v_writelane_b32 v1,
|
||||
; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
|
||||
; MUBUF: s_add_u32 s32, s32, 0x400
|
||||
; FLATSCR: s_add_u32 s32, s32, 16
|
||||
; MUBUF: s_addk_i32 s32, 0x400
|
||||
; FLATSCR: s_add_i32 s32, s32, 16
|
||||
; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 16
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, -16
|
||||
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
@ -362,18 +362,18 @@ define void @no_new_vgpr_for_fp_csr() #1 {
|
||||
; GCN: s_waitcnt
|
||||
; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33
|
||||
; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
|
||||
; MUBUF-NEXT: s_add_u32 s33, s32, 0x7ffc0
|
||||
; FLATSCR-NEXT: s_add_u32 s33, s32, 0x1fff
|
||||
; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0
|
||||
; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff
|
||||
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
|
||||
; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
|
||||
; MUBUF-NEXT: s_add_u32 s32, s32, 0x100000
|
||||
; FLATSCR-NEXT: s_add_u32 s32, s32, 0x4000
|
||||
; MUBUF-NEXT: s_add_i32 s32, s32, 0x100000
|
||||
; FLATSCR-NEXT: s_addk_i32 s32, 0x4000
|
||||
; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
|
||||
; MUBUF-NEXT: buffer_store_dword [[ZERO]], off, s[0:3], s33
|
||||
; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], s33
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x100000
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x4000
|
||||
; MUBUF-NEXT: s_add_i32 s32, s32, 0xfff00000
|
||||
; FLATSCR-NEXT: s_addk_i32 s32, 0xc000
|
||||
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @realign_stack_no_fp_elim() #1 {
|
||||
@ -397,14 +397,14 @@ define void @realign_stack_no_fp_elim() #1 {
|
||||
; FLATSCR: scratch_store_dword off, [[ZERO]], s33 offset:4
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN: ;;#ASMSTART
|
||||
; MUBUF: s_add_u32 s32, s32, 0x300
|
||||
; MUBUF: s_addk_i32 s32, 0x300
|
||||
; MUBUF-NEXT: v_readlane_b32 s4, v1, 0
|
||||
; MUBUF-NEXT: v_readlane_b32 s5, v1, 1
|
||||
; FLATSCR: s_add_u32 s32, s32, 12
|
||||
; FLATSCR: s_add_i32 s32, s32, 12
|
||||
; FLATSCR-NEXT: v_readlane_b32 s0, v1, 0
|
||||
; FLATSCR-NEXT: v_readlane_b32 s1, v1, 1
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 12
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0xfd00
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, -12
|
||||
; GCN-NEXT: v_readlane_b32 s33, v1, 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
||||
@ -441,16 +441,16 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
|
||||
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
|
||||
; MUBUF-DAG: buffer_store_dword
|
||||
; FLATSCR-DAG: scratch_store_dword
|
||||
; MUBUF: s_add_u32 s32, s32, 0x300{{$}}
|
||||
; FLATSCR: s_add_u32 s32, s32, 12{{$}}
|
||||
; MUBUF: s_addk_i32 s32, 0x300{{$}}
|
||||
; FLATSCR: s_add_i32 s32, s32, 12{{$}}
|
||||
|
||||
; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0
|
||||
; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0
|
||||
; GCN: ;;#ASMSTART
|
||||
; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1
|
||||
; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300{{$}}
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 12{{$}}
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0xfd00{{$}}
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, -12{{$}}
|
||||
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
||||
@ -483,17 +483,17 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
|
||||
; GCN-LABEL: {{^}}scratch_reg_needed_mubuf_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
|
||||
; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
|
||||
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
|
||||
; FLATSCR-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
|
||||
; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
|
||||
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2
|
||||
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
|
||||
; GCN-DAG: s_mov_b32 s33, s32
|
||||
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
|
||||
; MUBUF-DAG: s_add_u32 s32, s32, 0x40300{{$}}
|
||||
; FLATSCR-DAG: s_add_u32 s32, s32, 0x100c{{$}}
|
||||
; MUBUF-DAG: s_add_i32 s32, s32, 0x40300{{$}}
|
||||
; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}}
|
||||
; MUBUF-DAG: buffer_store_dword
|
||||
; FLATSCR-DAG: scratch_store_dword
|
||||
|
||||
@ -502,13 +502,13 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
|
||||
; GCN: ;;#ASMSTART
|
||||
; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1
|
||||
; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x40300{{$}}
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x100c{{$}}
|
||||
; MUBUF-NEXT: s_add_i32 s32, s32, 0xfffbfd00{{$}}
|
||||
; FLATSCR-NEXT: s_addk_i32 s32, 0xeff4{{$}}
|
||||
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
|
||||
; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Reload
|
||||
; FLATSCR-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
|
||||
; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
|
||||
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -546,13 +546,13 @@ define internal void @local_empty_func() #0 {
|
||||
; GCN-LABEL: {{^}}ipra_call_with_stack:
|
||||
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
|
||||
; GCN: s_mov_b32 s33, s32
|
||||
; MUBUF: s_add_u32 s32, s32, 0x400
|
||||
; FLATSCR: s_add_u32 s32, s32, 16
|
||||
; MUBUF: s_addk_i32 s32, 0x400
|
||||
; FLATSCR: s_add_i32 s32, s32, 16
|
||||
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}}
|
||||
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33{{$}}
|
||||
; GCN: s_swappc_b64
|
||||
; MUBUF: s_sub_u32 s32, s32, 0x400
|
||||
; FLATSCR: s_sub_u32 s32, s32, 16
|
||||
; MUBUF: s_addk_i32 s32, 0xfc00
|
||||
; FLATSCR: s_add_i32 s32, s32, -16
|
||||
; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]]
|
||||
define void @ipra_call_with_stack() #0 {
|
||||
%alloca = alloca i32, addrspace(5)
|
||||
@ -666,13 +666,13 @@ define void @callee_need_to_spill_fp_to_reg() #1 {
|
||||
; scratch VGPR to hold the offset.
|
||||
; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
|
||||
; MUBUF: s_or_saveexec_b64 s[4:5], -1
|
||||
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
|
||||
; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
|
||||
; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
|
||||
; MUBUF: v_mov_b32_e32 v0, s33
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x100c
|
||||
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40300
|
||||
; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40300
|
||||
; MUBUF: buffer_store_dword v0, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
|
||||
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s33, 0x1004
|
||||
; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s33, 0x1004
|
||||
; FLATSCR: v_mov_b32_e32 v0, 0
|
||||
; FLATSCR: scratch_store_dword off, v0, [[SOFF]]
|
||||
define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #3 {
|
||||
|
@ -522,7 +522,7 @@ define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
|
||||
|
||||
; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
|
||||
; GCN-DAG: s_mov_b32 s33, s32
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x400
|
||||
; GCN-DAG: s_addk_i32 s32, 0x400
|
||||
; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5]
|
||||
; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7]
|
||||
|
||||
|
@ -403,7 +403,7 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
|
||||
|
||||
; Requires loading and storing to stack slot.
|
||||
; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
|
||||
; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
|
||||
; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}}
|
||||
|
||||
@ -411,7 +411,7 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
|
||||
|
||||
; GCN: s_swappc_b64
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0x400{{$}}
|
||||
; GCN: s_addk_i32 s32, 0xfc00{{$}}
|
||||
; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
; GCN: s_setpc_b64
|
||||
define void @too_many_args_call_too_many_args_use_workitem_id_x(
|
||||
|
@ -509,7 +509,7 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
|
||||
|
||||
; Requires loading and storing to stack slot.
|
||||
; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
|
||||
; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
|
||||
; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}}
|
||||
|
||||
@ -517,7 +517,7 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
|
||||
|
||||
; GCN: s_swappc_b64
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0x400{{$}}
|
||||
; GCN: s_addk_i32 s32, 0xfc00{{$}}
|
||||
; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
; GCN: s_setpc_b64
|
||||
define void @too_many_args_call_too_many_args_use_workitem_id_x(
|
||||
|
@ -22,7 +22,7 @@ entry:
|
||||
define amdgpu_kernel void @test_kern_stack() local_unnamed_addr #0 {
|
||||
; GFX803-LABEL: test_kern_stack:
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_u32 s4, s4, s7
|
||||
; GFX803-NEXT: s_add_i32 s4, s4, s7
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s7
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
@ -64,7 +64,7 @@ entry:
|
||||
define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
|
||||
; GFX803-LABEL: test_kern_call:
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_u32 s4, s4, s7
|
||||
; GFX803-NEXT: s_add_i32 s4, s4, s7
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s7
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
@ -111,7 +111,7 @@ entry:
|
||||
define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
|
||||
; GFX803-LABEL: test_kern_stack_and_call:
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_u32 s4, s4, s7
|
||||
; GFX803-NEXT: s_add_i32 s4, s4, s7
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s7
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
@ -188,7 +188,7 @@ entry:
|
||||
define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 {
|
||||
; GFX803-LABEL: test_force_fp_kern_stack:
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_u32 s4, s4, s7
|
||||
; GFX803-NEXT: s_add_i32 s4, s4, s7
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s7
|
||||
; GFX803-NEXT: s_mov_b32 s33, 0
|
||||
@ -233,7 +233,7 @@ entry:
|
||||
define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
|
||||
; GFX803-LABEL: test_force_fp_kern_call:
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_u32 s4, s4, s7
|
||||
; GFX803-NEXT: s_add_i32 s4, s4, s7
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s7
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
@ -283,7 +283,7 @@ entry:
|
||||
define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 {
|
||||
; GFX803-LABEL: test_force_fp_kern_stack_and_call:
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_u32 s4, s4, s7
|
||||
; GFX803-NEXT: s_add_i32 s4, s4, s7
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s7
|
||||
; GFX803-NEXT: s_mov_b32 s33, 0
|
||||
@ -344,7 +344,7 @@ entry:
|
||||
define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
|
||||
; GFX803-LABEL: test_sgpr_offset_kernel:
|
||||
; GFX803: ; %bb.0: ; %entry
|
||||
; GFX803-NEXT: s_add_u32 s4, s4, s7
|
||||
; GFX803-NEXT: s_add_i32 s4, s4, s7
|
||||
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; GFX803-NEXT: s_add_u32 s0, s0, s7
|
||||
; GFX803-NEXT: s_addc_u32 s1, s1, 0
|
||||
|
@ -33,7 +33,7 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
|
||||
; GCN-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: s_getpc_b64 s[4:5]
|
||||
; GCN-NEXT: s_add_u32 s4, s4, func_v2f32@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s5, s5, func_v2f32@rel32@hi+12
|
||||
@ -41,7 +41,7 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GCN-NEXT: v_readlane_b32 s4, v40, 0
|
||||
; GCN-NEXT: v_readlane_b32 s5, v40, 1
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GCN-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -67,7 +67,7 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
|
||||
; GCN-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: s_getpc_b64 s[4:5]
|
||||
; GCN-NEXT: s_add_u32 s4, s4, func_v3f32@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s5, s5, func_v3f32@rel32@hi+12
|
||||
@ -75,7 +75,7 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GCN-NEXT: v_readlane_b32 s4, v40, 0
|
||||
; GCN-NEXT: v_readlane_b32 s5, v40, 1
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GCN-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -101,7 +101,7 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
|
||||
; GCN-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: s_getpc_b64 s[4:5]
|
||||
; GCN-NEXT: s_add_u32 s4, s4, func_v4f16@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s5, s5, func_v4f16@rel32@hi+12
|
||||
@ -109,7 +109,7 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
|
||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GCN-NEXT: v_readlane_b32 s4, v40, 0
|
||||
; GCN-NEXT: v_readlane_b32 s5, v40, 1
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GCN-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -135,7 +135,7 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
|
||||
; GCN-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: s_getpc_b64 s[4:5]
|
||||
; GCN-NEXT: s_add_u32 s4, s4, func_struct@rel32@lo+4
|
||||
; GCN-NEXT: s_addc_u32 s5, s5, func_struct@rel32@hi+12
|
||||
@ -144,7 +144,7 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
|
||||
; GCN-NEXT: v_readlane_b32 s4, v40, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, v4
|
||||
; GCN-NEXT: v_readlane_b32 s5, v40, 1
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GCN-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
|
@ -228,10 +228,10 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
|
||||
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-NEXT: s_add_u32 s1, 4, s1
|
||||
; GFX9-NEXT: s_add_i32 s1, s1, 4
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX9-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_endpgm
|
||||
@ -248,8 +248,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
|
||||
; GFX10-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX10-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX10-NEXT: s_add_u32 s1, 4, s1
|
||||
; GFX10-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX10-NEXT: s_add_i32 s1, s1, 4
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -270,10 +270,10 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
|
||||
; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2
|
||||
; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-PAL-NEXT: s_add_u32 s1, 4, s1
|
||||
; GFX9-PAL-NEXT: s_add_i32 s1, s1, 4
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_endpgm
|
||||
@ -295,8 +295,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
|
||||
; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX10-PAL-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX10-PAL-NEXT: s_add_u32 s1, 4, s1
|
||||
; GFX10-PAL-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX10-PAL-NEXT: s_add_i32 s1, s1, 4
|
||||
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -321,13 +321,13 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
|
||||
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3
|
||||
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s2, 2
|
||||
; GFX9-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX9-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_and_b32 s0, s2, 15
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX9-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_endpgm
|
||||
@ -342,8 +342,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX10-NEXT: s_lshl_b32 s1, s2, 2
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-NEXT: s_add_u32 s1, 4, s1
|
||||
; GFX10-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX10-NEXT: s_add_i32 s1, s1, 4
|
||||
; GFX10-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc
|
||||
@ -363,10 +363,10 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
|
||||
; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2
|
||||
; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-PAL-NEXT: s_add_u32 s1, 4, s1
|
||||
; GFX9-PAL-NEXT: s_add_i32 s1, s1, 4
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_endpgm
|
||||
@ -386,8 +386,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
|
||||
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX10-PAL-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX10-PAL-NEXT: s_add_u32 s1, 4, s1
|
||||
; GFX10-PAL-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX10-PAL-NEXT: s_add_i32 s1, s1, 4
|
||||
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -857,10 +857,10 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX9-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX9-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_endpgm
|
||||
@ -879,8 +879,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
|
||||
; GFX10-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX10-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX10-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX10-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX10-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -904,10 +904,10 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-PAL-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX9-PAL-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX9-PAL-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_endpgm
|
||||
@ -932,8 +932,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
|
||||
; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX1010-PAL-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX1010-PAL-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -959,8 +959,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
|
||||
; GFX1030-PAL-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX1030-PAL-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX1030-PAL-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -991,13 +991,13 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s2, 2
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX9-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_and_b32 s0, s2, 15
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX9-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_endpgm
|
||||
@ -1014,8 +1014,8 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX10-NEXT: s_lshl_b32 s1, s2, 2
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX10-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX10-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX10-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc
|
||||
@ -1037,11 +1037,11 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
|
||||
; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-PAL-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX9-PAL-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX9-PAL-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_endpgm
|
||||
@ -1064,8 +1064,8 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
|
||||
; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX1010-PAL-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX1010-PAL-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -1089,8 +1089,8 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
|
||||
; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX1030-PAL-NEXT: s_add_u32 s0, 0x104, s0
|
||||
; GFX1030-PAL-NEXT: s_add_u32 s1, 0x104, s1
|
||||
; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x104
|
||||
; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x104
|
||||
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -1245,7 +1245,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x100
|
||||
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, 15
|
||||
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
|
||||
@ -1262,7 +1262,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x100
|
||||
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
|
||||
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
|
||||
@ -1280,7 +1280,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 glc
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x100
|
||||
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x100
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
|
||||
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
|
||||
@ -1297,7 +1297,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
|
||||
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x100
|
||||
; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100
|
||||
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo
|
||||
; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
|
||||
; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
|
||||
@ -1495,13 +1495,13 @@ define void @zero_init_large_offset_foo() {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi
|
||||
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
|
||||
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
|
||||
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1513,7 +1513,7 @@ define void @zero_init_large_offset_foo() {
|
||||
; GFX10-NEXT: scratch_load_dword v0, off, s32 glc dlc
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_mov_b32 s0, 0
|
||||
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: s_mov_b32 s1, s0
|
||||
; GFX10-NEXT: s_mov_b32 s2, s0
|
||||
; GFX10-NEXT: s_mov_b32 s3, s0
|
||||
@ -1522,11 +1522,11 @@ define void @zero_init_large_offset_foo() {
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo
|
||||
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
|
||||
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
|
||||
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1544,13 +1544,13 @@ define void @zero_init_large_offset_foo() {
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi
|
||||
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
|
||||
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
|
||||
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1562,7 +1562,7 @@ define void @zero_init_large_offset_foo() {
|
||||
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc
|
||||
; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1010-PAL-NEXT: s_mov_b32 s0, 0
|
||||
; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX1010-PAL-NEXT: s_mov_b32 s1, s0
|
||||
; GFX1010-PAL-NEXT: s_mov_b32 s2, s0
|
||||
; GFX1010-PAL-NEXT: s_mov_b32 s3, s0
|
||||
@ -1572,13 +1572,13 @@ define void @zero_init_large_offset_foo() {
|
||||
; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo
|
||||
; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
|
||||
; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
|
||||
; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
|
||||
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-PAL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1590,7 +1590,7 @@ define void @zero_init_large_offset_foo() {
|
||||
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc
|
||||
; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1030-PAL-NEXT: s_mov_b32 s0, 0
|
||||
; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX1030-PAL-NEXT: s_mov_b32 s1, s0
|
||||
; GFX1030-PAL-NEXT: s_mov_b32 s2, s0
|
||||
; GFX1030-PAL-NEXT: s_mov_b32 s3, s0
|
||||
@ -1599,11 +1599,11 @@ define void @zero_init_large_offset_foo() {
|
||||
; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo
|
||||
; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
|
||||
; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
|
||||
; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
|
||||
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1030-PAL-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1629,10 +1629,10 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 15
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX9-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX9-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_endpgm
|
||||
@ -1651,8 +1651,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
|
||||
; GFX10-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX10-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX10-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX10-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX10-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -1676,10 +1676,10 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-PAL-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX9-PAL-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX9-PAL-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_endpgm
|
||||
@ -1704,8 +1704,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
|
||||
; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX1010-PAL-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX1010-PAL-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -1731,8 +1731,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
|
||||
; GFX1030-PAL-NEXT: s_and_b32 s1, s0, 15
|
||||
; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX1030-PAL-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX1030-PAL-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -1763,13 +1763,13 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s2, 2
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX9-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_and_b32 s0, s2, 15
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX9-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_endpgm
|
||||
@ -1786,8 +1786,8 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX10-NEXT: s_lshl_b32 s1, s2, 2
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX10-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX10-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX10-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX10-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc
|
||||
@ -1809,11 +1809,11 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
|
||||
; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX9-PAL-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX9-PAL-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX9-PAL-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_endpgm
|
||||
@ -1836,8 +1836,8 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
|
||||
; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX1010-PAL-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX1010-PAL-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -1861,8 +1861,8 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
|
||||
; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2
|
||||
; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2
|
||||
; GFX1030-PAL-NEXT: s_add_u32 s0, 0x4004, s0
|
||||
; GFX1030-PAL-NEXT: s_add_u32 s1, 0x4004, s1
|
||||
; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x4004
|
||||
; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x4004
|
||||
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
|
||||
@ -2017,7 +2017,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, 15
|
||||
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
|
||||
@ -2034,7 +2034,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
|
||||
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
|
||||
@ -2052,7 +2052,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 glc
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000
|
||||
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
|
||||
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
|
||||
@ -2069,7 +2069,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
|
||||
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
|
||||
; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
|
||||
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo
|
||||
; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
|
||||
; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
|
||||
@ -2107,7 +2107,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX9-NEXT: s_mov_b32 vcc_hi, 0
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX9-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2124,7 +2124,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-NEXT: s_movk_i32 s0, 0x3800
|
||||
; GFX10-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX10-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
|
||||
@ -2147,7 +2147,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, vcc_hi offset:4
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2170,7 +2170,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3800
|
||||
; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0
|
||||
; GFX1010-PAL-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX1010-PAL-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, vcc_lo offset:4
|
||||
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
|
||||
@ -2193,7 +2193,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3800
|
||||
; GFX1030-PAL-NEXT: s_add_u32 s0, 4, s0
|
||||
; GFX1030-PAL-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, off offset:4
|
||||
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1030-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
|
||||
@ -2220,7 +2220,7 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s32
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s0, s32, s0
|
||||
; GFX9-NEXT: s_add_i32 s0, s0, s32
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2235,7 +2235,7 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-NEXT: s_movk_i32 s0, 0x3800
|
||||
; GFX10-NEXT: s_add_u32 s0, s32, s0
|
||||
; GFX10-NEXT: s_add_i32 s0, s0, s32
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s32
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
|
||||
@ -2251,7 +2251,7 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_u32 s0, s32, s0
|
||||
; GFX9-PAL-NEXT: s_add_i32 s0, s0, s32
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -2266,7 +2266,7 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800
|
||||
; GFX10-PAL-NEXT: s_add_u32 s0, s32, s0
|
||||
; GFX10-PAL-NEXT: s_add_i32 s0, s0, s32
|
||||
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32
|
||||
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
|
||||
|
@ -37,7 +37,7 @@ define void @func_mov_fi_i32() #0 {
|
||||
|
||||
; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32
|
||||
; GFX9-FLATSCR: v_mov_b32_e32 v0, s32
|
||||
; GFX9-FLATSCR: s_add_u32 [[ADD:[^,]+]], s32, 4
|
||||
; GFX9-FLATSCR: s_add_i32 [[ADD:[^,]+]], s32, 4
|
||||
; GFX9-NEXT: ds_write_b32 v0, v0
|
||||
; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
|
||||
; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
|
||||
@ -196,7 +196,7 @@ ret:
|
||||
; GFX9-MUBUF-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
|
||||
; GFX9-MUBUF: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]]
|
||||
|
||||
; GFX9-FLATSCR-DAG: s_add_u32 [[SZ:[^,]+]], s32, 0x200
|
||||
; GFX9-FLATSCR-DAG: s_add_i32 [[SZ:[^,]+]], s32, 0x200
|
||||
; GFX9-FLATSCR: v_mov_b32_e32 [[VZ:v[0-9]+]], [[SZ]]
|
||||
|
||||
; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9
|
||||
@ -222,7 +222,7 @@ define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
|
||||
; GFX9-MUBUF-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
|
||||
; GFX9-MUBUF: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]]
|
||||
|
||||
; GFX9-FLATSCR-DAG: s_add_u32 [[SZ:[^,]+]], s32, 0x200
|
||||
; GFX9-FLATSCR-DAG: s_add_i32 [[SZ:[^,]+]], s32, 0x200
|
||||
; GFX9-FLATSCR: v_mov_b32_e32 [[VZ:v[0-9]+]], [[SZ]]
|
||||
|
||||
; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9
|
||||
|
@ -16,7 +16,7 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32
|
||||
; SPILL-TO-VGPR-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400
|
||||
; SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5]
|
||||
; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
@ -27,7 +27,7 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 0
|
||||
; SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v40, 1
|
||||
; SPILL-TO-VGPR-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; SPILL-TO-VGPR-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
@ -41,7 +41,7 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, s33
|
||||
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s32, s32, 0x800
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3
|
||||
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16
|
||||
@ -68,7 +68,7 @@ define void @callee_with_stack_and_call() #0 {
|
||||
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7]
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_sub_u32 s32, s32, 0x800
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xf800
|
||||
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
|
||||
; NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s33, v0
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -16,7 +16,7 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s35, 1
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 2
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: s_getpc_b64 s[34:35]
|
||||
; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
|
||||
@ -29,7 +29,7 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v40, 3
|
||||
; GFX9-NEXT: v_readlane_b32 s35, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s34, v40, 0
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v40, 4
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -47,7 +47,7 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s33, 4
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s34, 0
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s35, 1
|
||||
; GFX10-NEXT: s_getpc_b64 s[34:35]
|
||||
@ -63,7 +63,7 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e
|
||||
; GFX10-NEXT: v_readlane_b32 s5, v40, 3
|
||||
; GFX10-NEXT: v_readlane_b32 s35, v40, 1
|
||||
; GFX10-NEXT: v_readlane_b32 s34, v40, 0
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v40, 4
|
||||
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
|
||||
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -111,7 +111,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s34, 0
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s31, 2
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; def s31
|
||||
@ -128,7 +128,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v40, 2
|
||||
; GFX9-NEXT: v_readlane_b32 s34, v40, 0
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v40, 3
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -146,7 +146,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s33, 3
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
@ -165,7 +165,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)
|
||||
; GFX10-NEXT: ;;#ASMEND
|
||||
; GFX10-NEXT: v_readlane_b32 s5, v40, 2
|
||||
; GFX10-NEXT: v_readlane_b32 s34, v40, 0
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v40, 3
|
||||
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
|
||||
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -189,7 +189,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)
|
||||
; GFX9-NEXT: v_writelane_b32 v41, s33, 2
|
||||
; GFX9-NEXT: v_writelane_b32 v41, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; def v31
|
||||
@ -207,7 +207,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)
|
||||
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; GFX9-NEXT: v_readlane_b32 s4, v41, 0
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v41, 1
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v41, 2
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
@ -225,7 +225,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX10-NEXT: v_writelane_b32 v41, s33, 2
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
; GFX10-NEXT: ;;#ASMSTART
|
||||
; GFX10-NEXT: ; def v31
|
||||
@ -244,7 +244,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)
|
||||
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; GFX10-NEXT: v_readlane_b32 s4, v41, 0
|
||||
; GFX10-NEXT: v_readlane_b32 s5, v41, 1
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v41, 2
|
||||
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
|
||||
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
@ -270,7 +270,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)*
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s33, 0
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
@ -285,7 +285,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)*
|
||||
; GFX9-NEXT: v_readlane_b32 s4, v40, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v40, 0
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v40, 2
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v40, 3
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -303,7 +303,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)*
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s33, 3
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
@ -320,7 +320,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)*
|
||||
; GFX10-NEXT: v_readlane_b32 s4, v40, 1
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v40, 0
|
||||
; GFX10-NEXT: v_readlane_b32 s5, v40, 2
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v40, 3
|
||||
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
|
||||
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -345,7 +345,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)*
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s34, 0
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
@ -360,7 +360,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)*
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v40, 2
|
||||
; GFX9-NEXT: v_readlane_b32 s34, v40, 0
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v40, 3
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -378,7 +378,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)*
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s33, 3
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
@ -395,7 +395,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)*
|
||||
; GFX10-NEXT: ;;#ASMEND
|
||||
; GFX10-NEXT: v_readlane_b32 s5, v40, 2
|
||||
; GFX10-NEXT: v_readlane_b32 s34, v40, 0
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v40, 3
|
||||
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
|
||||
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -419,7 +419,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)*
|
||||
; GFX9-NEXT: v_writelane_b32 v41, s33, 2
|
||||
; GFX9-NEXT: v_writelane_b32 v41, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
@ -435,7 +435,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)*
|
||||
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; GFX9-NEXT: v_readlane_b32 s4, v41, 0
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v41, 1
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v41, 2
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
@ -453,7 +453,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)*
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX10-NEXT: v_writelane_b32 v41, s33, 2
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
@ -470,7 +470,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)*
|
||||
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; GFX10-NEXT: v_readlane_b32 s4, v41, 0
|
||||
; GFX10-NEXT: v_readlane_b32 s5, v41, 1
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v41, 2
|
||||
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
|
||||
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
@ -578,7 +578,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12
|
||||
@ -586,7 +586,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -604,7 +604,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12
|
||||
@ -613,7 +613,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
|
||||
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
|
||||
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
|
||||
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -635,7 +635,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12
|
||||
@ -643,7 +643,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -661,7 +661,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12
|
||||
@ -670,7 +670,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
|
||||
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
|
||||
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
|
||||
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -693,7 +693,7 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s40, 0
|
||||
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
@ -708,7 +708,7 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v40, 2
|
||||
; GFX9-NEXT: v_readlane_b32 s40, v40, 0
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v40, 3
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -726,7 +726,7 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s33, 3
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
@ -743,7 +743,7 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
|
||||
; GFX10-NEXT: ;;#ASMEND
|
||||
; GFX10-NEXT: v_readlane_b32 s5, v40, 2
|
||||
; GFX10-NEXT: v_readlane_b32 s40, v40, 0
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v40, 3
|
||||
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
|
||||
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -768,7 +768,7 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
|
||||
; GFX9-NEXT: v_writelane_b32 v41, s40, 0
|
||||
; GFX9-NEXT: v_writelane_b32 v41, s30, 1
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; def s40
|
||||
@ -792,7 +792,7 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
|
||||
; GFX9-NEXT: v_readlane_b32 s4, v41, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v41, 2
|
||||
; GFX9-NEXT: v_readlane_b32 s40, v41, 0
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v41, 3
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
@ -810,7 +810,7 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
|
||||
; GFX10-NEXT: s_mov_b32 exec_lo, s4
|
||||
; GFX10-NEXT: v_writelane_b32 v41, s33, 3
|
||||
; GFX10-NEXT: s_mov_b32 s33, s32
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0x200
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
|
||||
@ -836,7 +836,7 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
|
||||
; GFX10-NEXT: v_readlane_b32 s4, v41, 1
|
||||
; GFX10-NEXT: v_readlane_b32 s5, v41, 2
|
||||
; GFX10-NEXT: v_readlane_b32 s40, v41, 0
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; GFX10-NEXT: v_readlane_b32 s33, v41, 3
|
||||
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
|
||||
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
|
@ -1242,9 +1242,9 @@ define amdgpu_gfx void @call_512xi32() #0 {
|
||||
; GFX9: ; %bb.0: ; %entry
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s8, s33
|
||||
; GFX9-NEXT: s_add_u32 s33, s32, 0x1ffc0
|
||||
; GFX9-NEXT: s_add_i32 s33, s32, 0x1ffc0
|
||||
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffe0000
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x60000
|
||||
; GFX9-NEXT: s_add_i32 s32, s32, 0x60000
|
||||
; GFX9-NEXT: s_getpc_b64 s[6:7]
|
||||
; GFX9-NEXT: s_add_u32 s6, s6, return_512xi32@gotpcrel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s7, s7, return_512xi32@gotpcrel32@hi+12
|
||||
@ -1253,7 +1253,7 @@ define amdgpu_gfx void @call_512xi32() #0 {
|
||||
; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31]
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x60000
|
||||
; GFX9-NEXT: s_add_i32 s32, s32, 0xfffa0000
|
||||
; GFX9-NEXT: s_mov_b32 s33, s8
|
||||
; GFX9-NEXT: s_setpc_b64 s[4:5]
|
||||
;
|
||||
@ -1262,10 +1262,10 @@ define amdgpu_gfx void @call_512xi32() #0 {
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_mov_b32 s8, s33
|
||||
; GFX10-NEXT: s_add_u32 s33, s32, 0xffe0
|
||||
; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0
|
||||
; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31]
|
||||
; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000
|
||||
; GFX10-NEXT: s_add_u32 s32, s32, 0x30000
|
||||
; GFX10-NEXT: s_add_i32 s32, s32, 0x30000
|
||||
; GFX10-NEXT: s_getpc_b64 s[6:7]
|
||||
; GFX10-NEXT: s_add_u32 s6, s6, return_512xi32@gotpcrel32@lo+4
|
||||
; GFX10-NEXT: s_addc_u32 s7, s7, return_512xi32@gotpcrel32@hi+12
|
||||
@ -1273,7 +1273,7 @@ define amdgpu_gfx void @call_512xi32() #0 {
|
||||
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
|
||||
; GFX10-NEXT: s_sub_u32 s32, s32, 0x30000
|
||||
; GFX10-NEXT: s_add_i32 s32, s32, 0xfffd0000
|
||||
; GFX10-NEXT: s_mov_b32 s33, s8
|
||||
; GFX10-NEXT: s_setpc_b64 s[4:5]
|
||||
entry:
|
||||
|
@ -77,7 +77,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr() {
|
||||
; GCN-NEXT: ; %bb.0:
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
|
||||
; GCN-NEXT: s_add_u32 s12, s12, s17
|
||||
; GCN-NEXT: s_add_i32 s12, s12, s17
|
||||
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
||||
; GCN-NEXT: s_add_u32 s0, s0, s17
|
||||
; GCN-NEXT: s_addc_u32 s1, s1, 0
|
||||
@ -173,7 +173,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg() {
|
||||
; GCN-NEXT: ; %bb.0:
|
||||
; GCN-NEXT: s_mov_b32 s32, 0
|
||||
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
|
||||
; GCN-NEXT: s_add_u32 s12, s12, s17
|
||||
; GCN-NEXT: s_add_i32 s12, s12, s17
|
||||
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
||||
; GCN-NEXT: s_add_u32 s0, s0, s17
|
||||
; GCN-NEXT: s_addc_u32 s1, s1, 0
|
||||
@ -206,7 +206,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
|
||||
; GCN-NEXT: s_mov_b64 exec, s[16:17]
|
||||
; GCN-NEXT: v_writelane_b32 v43, s33, 17
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x800
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x800
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
@ -276,7 +276,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
|
||||
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x800
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xf800
|
||||
; GCN-NEXT: v_readlane_b32 s33, v43, 17
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
@ -296,7 +296,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
|
||||
; GCN-NEXT: s_mov_b64 exec, s[16:17]
|
||||
; GCN-NEXT: v_writelane_b32 v43, s33, 17
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x800
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x800
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
@ -367,7 +367,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
|
||||
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x800
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xf800
|
||||
; GCN-NEXT: v_readlane_b32 s33, v43, 17
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
@ -387,7 +387,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
|
||||
; GCN-NEXT: s_mov_b64 exec, s[16:17]
|
||||
; GCN-NEXT: v_writelane_b32 v43, s33, 17
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x800
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x800
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
@ -458,7 +458,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
|
||||
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x800
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xf800
|
||||
; GCN-NEXT: v_readlane_b32 s33, v43, 17
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
@ -479,7 +479,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
|
||||
; GCN-NEXT: s_mov_b64 exec, s[16:17]
|
||||
; GCN-NEXT: v_writelane_b32 v43, s33, 19
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x800
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x800
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
@ -560,7 +560,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
|
||||
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x800
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xf800
|
||||
; GCN-NEXT: v_readlane_b32 s33, v43, 19
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
|
||||
; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
@ -587,7 +587,7 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
|
||||
; GCN-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GCN-NEXT: v_writelane_b32 v42, s33, 6
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: v_writelane_b32 v42, s34, 0
|
||||
@ -618,7 +618,7 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
|
||||
; GCN-NEXT: v_readlane_b32 s34, v42, 0
|
||||
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GCN-NEXT: v_readlane_b32 s33, v42, 6
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
||||
|
@ -70,7 +70,7 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: BB0_1: ; %loadstoreloop
|
||||
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLATSCR-NEXT: s_add_u32 s3, 0x3000, s2
|
||||
; FLATSCR-NEXT: s_add_i32 s3, s2, 0x3000
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s2, 1
|
||||
; FLATSCR-NEXT: s_cmpk_lt_u32 s2, 0x2120
|
||||
; FLATSCR-NEXT: scratch_store_byte off, v0, s3
|
||||
@ -78,7 +78,7 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 BB0_1
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %split
|
||||
; FLATSCR-NEXT: s_movk_i32 s2, 0x2000
|
||||
; FLATSCR-NEXT: s_add_u32 s2, 0x3000, s2
|
||||
; FLATSCR-NEXT: s_addk_i32 s2, 0x3000
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s2 offset:208 glc
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_movk_i32 s2, 0x3000
|
||||
@ -111,14 +111,14 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
||||
; MUBUF: ; %bb.0: ; %entry
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; MUBUF-NEXT: s_mov_b32 s5, s33
|
||||
; MUBUF-NEXT: s_add_u32 s33, s32, 0x7ffc0
|
||||
; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0
|
||||
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
|
||||
; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33
|
||||
; MUBUF-NEXT: v_add_u32_e32 v3, 0x1000, v3
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v4, 0
|
||||
; MUBUF-NEXT: v_add_u32_e32 v2, 64, v3
|
||||
; MUBUF-NEXT: s_mov_b32 s4, 0
|
||||
; MUBUF-NEXT: s_add_u32 s32, s32, 0x180000
|
||||
; MUBUF-NEXT: s_add_i32 s32, s32, 0x180000
|
||||
; MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], s33
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: BB1_1: ; %loadstoreloop
|
||||
@ -141,7 +141,7 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:4 glc
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x180000
|
||||
; MUBUF-NEXT: s_add_i32 s32, s32, 0xffe80000
|
||||
; MUBUF-NEXT: s_mov_b32 s33, s5
|
||||
; MUBUF-NEXT: v_add_co_u32_e32 v2, vcc, v4, v6
|
||||
; MUBUF-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v7, vcc
|
||||
@ -153,17 +153,17 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
||||
; FLATSCR: ; %bb.0: ; %entry
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FLATSCR-NEXT: s_mov_b32 s2, s33
|
||||
; FLATSCR-NEXT: s_add_u32 s33, s32, 0x1fff
|
||||
; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff
|
||||
; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v2, 0
|
||||
; FLATSCR-NEXT: s_mov_b32 s0, 0
|
||||
; FLATSCR-NEXT: s_add_u32 s32, s32, 0x6000
|
||||
; FLATSCR-NEXT: s_addk_i32 s32, 0x6000
|
||||
; FLATSCR-NEXT: scratch_store_dword off, v2, s33
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: BB1_1: ; %loadstoreloop
|
||||
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLATSCR-NEXT: s_add_u32 vcc_hi, s33, 0x1000
|
||||
; FLATSCR-NEXT: s_add_u32 s1, vcc_hi, s0
|
||||
; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x1000
|
||||
; FLATSCR-NEXT: s_add_i32 s1, s0, vcc_hi
|
||||
; FLATSCR-NEXT: s_add_i32 s0, s0, 1
|
||||
; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120
|
||||
; FLATSCR-NEXT: scratch_store_byte off, v2, s1
|
||||
@ -171,14 +171,14 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 BB1_1
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %split
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
|
||||
; FLATSCR-NEXT: s_add_u32 s1, s33, 0x1000
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s1, s0
|
||||
; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000
|
||||
; FLATSCR-NEXT: s_add_i32 s0, s0, s1
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:208 glc
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_add_u32 s0, s33, 0x1000
|
||||
; FLATSCR-NEXT: s_add_i32 s0, s33, 0x1000
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s0 offset:64 glc
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x6000
|
||||
; FLATSCR-NEXT: s_addk_i32 s32, 0xa000
|
||||
; FLATSCR-NEXT: s_mov_b32 s33, s2
|
||||
; FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
|
||||
; FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
|
||||
@ -286,7 +286,7 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: BB2_1: ; %loadstoreloop
|
||||
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; FLATSCR-NEXT: s_add_u32 s3, 0x2000, s2
|
||||
; FLATSCR-NEXT: s_add_i32 s3, s2, 0x2000
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s2, 1
|
||||
; FLATSCR-NEXT: s_cmpk_lt_u32 s2, 0x2120
|
||||
; FLATSCR-NEXT: scratch_store_byte off, v0, s3
|
||||
@ -294,7 +294,7 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 BB2_1
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %split
|
||||
; FLATSCR-NEXT: s_movk_i32 s2, 0x1000
|
||||
; FLATSCR-NEXT: s_add_u32 s2, 0x2000, s2
|
||||
; FLATSCR-NEXT: s_addk_i32 s2, 0x2000
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s2 offset:720 glc
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 offset:704 glc
|
||||
|
@ -193,7 +193,7 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
|
||||
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GFX9-NEXT: v_writelane_b32 v43, s33, 4
|
||||
; GFX9-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-NEXT: s_add_u32 s32, s32, 0x800
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0x800
|
||||
; GFX9-NEXT: v_writelane_b32 v43, s34, 0
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4
|
||||
@ -223,7 +223,7 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
|
||||
; GFX9-NEXT: v_readlane_b32 s5, v43, 3
|
||||
; GFX9-NEXT: v_readlane_b32 s35, v43, 1
|
||||
; GFX9-NEXT: v_readlane_b32 s34, v43, 0
|
||||
; GFX9-NEXT: s_sub_u32 s32, s32, 0x800
|
||||
; GFX9-NEXT: s_addk_i32 s32, 0xf800
|
||||
; GFX9-NEXT: v_readlane_b32 s33, v43, 4
|
||||
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
|
@ -8,11 +8,11 @@ define hidden fastcc void @callee_has_fp() #1 {
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b32 s4, s33
|
||||
; CHECK-NEXT: s_mov_b32 s33, s32
|
||||
; CHECK-NEXT: s_add_u32 s32, s32, 0x200
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0x200
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 1
|
||||
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_sub_u32 s32, s32, 0x200
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0xfe00
|
||||
; CHECK-NEXT: s_mov_b32 s33, s4
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%alloca = alloca i32, addrspace(5)
|
||||
@ -29,7 +29,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b32 s8, s33
|
||||
; CHECK-NEXT: s_mov_b32 s33, s32
|
||||
; CHECK-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0x400
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, callee_has_fp@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp@rel32@hi+12
|
||||
@ -40,7 +40,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
|
||||
; CHECK-NEXT: ; clobber csr v40
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
|
||||
; CHECK-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; CHECK-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; CHECK-NEXT: s_mov_b32 s33, s8
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[6:7]
|
||||
|
@ -16,7 +16,7 @@ declare void @external_void_func_i32(i32) #0
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
; GCN-DAG: v_writelane_b32 v40, s33, 2
|
||||
; GCN-DAG: s_mov_b32 s33, s32
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x400
|
||||
; GCN-DAG: s_addk_i32 s32, 0x400
|
||||
; GCN-DAG: v_writelane_b32 v40, s30, 0
|
||||
; GCN-DAG: v_writelane_b32 v40, s31, 1
|
||||
|
||||
@ -25,7 +25,7 @@ declare void @external_void_func_i32(i32) #0
|
||||
; GCN: v_readlane_b32 s4, v40, 0
|
||||
; GCN: v_readlane_b32 s5, v40, 1
|
||||
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GCN-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -40,10 +40,10 @@ define void @test_func_call_external_void_func_i32_imm() #0 {
|
||||
; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use:
|
||||
; GCN: s_waitcnt
|
||||
; GCN: s_mov_b32 s33, s32
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x1400{{$}}
|
||||
; GCN-DAG: s_addk_i32 s32, 0x1400{{$}}
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:
|
||||
; GCN: s_swappc_b64
|
||||
; GCN: s_sub_u32 s32, s32, 0x1400{{$}}
|
||||
; GCN: s_addk_i32 s32, 0xec00{{$}}
|
||||
; GCN: s_setpc_b64
|
||||
define void @test_func_call_external_void_func_i32_imm_stack_use() #0 {
|
||||
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
||||
|
@ -65,16 +65,14 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
|
||||
; FLATSCR-NEXT: s_cmp_lg_u32 s5, 0
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 BB0_3
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %bb.1
|
||||
; FLATSCR-NEXT: s_mov_b32 s2, s32
|
||||
; FLATSCR-NEXT: s_add_i32 s3, s2, 0x1000
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s32, 0x1000
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v1, 0
|
||||
; FLATSCR-NEXT: s_add_u32 s2, s2, 0x1000
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v2, 1
|
||||
; FLATSCR-NEXT: s_lshl_b32 s3, s6, 2
|
||||
; FLATSCR-NEXT: s_mov_b32 s32, s2
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s2
|
||||
; FLATSCR-NEXT: s_lshl_b32 s2, s6, 2
|
||||
; FLATSCR-NEXT: s_mov_b32 s32, s3
|
||||
; FLATSCR-NEXT: s_add_i32 s3, s3, s2
|
||||
; FLATSCR-NEXT: scratch_load_dword v2, off, s3
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s2, s3
|
||||
; FLATSCR-NEXT: scratch_load_dword v2, off, s2
|
||||
; FLATSCR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: v_add_u32_e32 v0, v2, v0
|
||||
@ -221,7 +219,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
||||
; MUBUF-NEXT: s_mov_b32 s7, s33
|
||||
; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; MUBUF-NEXT: s_mov_b32 s33, s32
|
||||
; MUBUF-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0x400
|
||||
; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; MUBUF-NEXT: s_cbranch_execz BB2_3
|
||||
; MUBUF-NEXT: ; %bb.1: ; %bb.0
|
||||
@ -247,17 +245,17 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
|
||||
; MUBUF-NEXT: global_store_dword v[0:1], v0, off
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; MUBUF-NEXT: s_mov_b32 s33, s7
|
||||
; MUBUF-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; FLATSCR-LABEL: func_non_entry_block_static_alloca_align4:
|
||||
; FLATSCR: ; %bb.0: ; %entry
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FLATSCR-NEXT: s_mov_b32 s4, s33
|
||||
; FLATSCR-NEXT: s_mov_b32 s3, s33
|
||||
; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; FLATSCR-NEXT: s_mov_b32 s33, s32
|
||||
; FLATSCR-NEXT: s_add_u32 s32, s32, 16
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
|
||||
; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; FLATSCR-NEXT: s_cbranch_execz BB2_3
|
||||
; FLATSCR-NEXT: ; %bb.1: ; %bb.0
|
||||
@ -265,16 +263,14 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
||||
; FLATSCR-NEXT: s_and_b64 exec, exec, vcc
|
||||
; FLATSCR-NEXT: s_cbranch_execz BB2_3
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %bb.1
|
||||
; FLATSCR-NEXT: s_mov_b32 s2, s32
|
||||
; FLATSCR-NEXT: s_add_i32 s3, s2, 0x1000
|
||||
; FLATSCR-NEXT: s_add_u32 s2, s2, 0x1000
|
||||
; FLATSCR-NEXT: s_add_i32 s2, s32, 0x1000
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v2, 0
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v3, 1
|
||||
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[2:3], s2
|
||||
; FLATSCR-NEXT: v_lshl_add_u32 v2, v4, 2, s3
|
||||
; FLATSCR-NEXT: v_lshl_add_u32 v2, v4, 2, s2
|
||||
; FLATSCR-NEXT: scratch_load_dword v2, v2, off
|
||||
; FLATSCR-NEXT: v_and_b32_e32 v3, 0x3ff, v5
|
||||
; FLATSCR-NEXT: s_mov_b32 s32, s3
|
||||
; FLATSCR-NEXT: s_mov_b32 s32, s2
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: v_add_u32_e32 v2, v2, v3
|
||||
; FLATSCR-NEXT: global_store_dword v[0:1], v2, off
|
||||
@ -283,8 +279,8 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; FLATSCR-NEXT: global_store_dword v[0:1], v0, off
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 16
|
||||
; FLATSCR-NEXT: s_mov_b32 s33, s4
|
||||
; FLATSCR-NEXT: s_add_i32 s32, s32, -16
|
||||
; FLATSCR-NEXT: s_mov_b32 s33, s3
|
||||
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
|
||||
|
||||
entry:
|
||||
@ -319,10 +315,10 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
||||
; MUBUF: ; %bb.0: ; %entry
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; MUBUF-NEXT: s_mov_b32 s7, s33
|
||||
; MUBUF-NEXT: s_add_u32 s33, s32, 0xfc0
|
||||
; MUBUF-NEXT: s_add_i32 s33, s32, 0xfc0
|
||||
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfffff000
|
||||
; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; MUBUF-NEXT: s_add_u32 s32, s32, 0x2000
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0x2000
|
||||
; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; MUBUF-NEXT: s_cbranch_execz BB3_2
|
||||
; MUBUF-NEXT: ; %bb.1: ; %bb.0
|
||||
@ -345,7 +341,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
|
||||
; MUBUF-NEXT: global_store_dword v[0:1], v0, off
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x2000
|
||||
; MUBUF-NEXT: s_addk_i32 s32, 0xe000
|
||||
; MUBUF-NEXT: s_mov_b32 s33, s7
|
||||
; MUBUF-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
@ -353,10 +349,10 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
||||
; FLATSCR: ; %bb.0: ; %entry
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FLATSCR-NEXT: s_mov_b32 s3, s33
|
||||
; FLATSCR-NEXT: s_add_u32 s33, s32, 63
|
||||
; FLATSCR-NEXT: s_add_i32 s33, s32, 63
|
||||
; FLATSCR-NEXT: s_andn2_b32 s33, s33, 63
|
||||
; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; FLATSCR-NEXT: s_add_u32 s32, s32, 0x80
|
||||
; FLATSCR-NEXT: s_addk_i32 s32, 0x80
|
||||
; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; FLATSCR-NEXT: s_cbranch_execz BB3_2
|
||||
; FLATSCR-NEXT: ; %bb.1: ; %bb.0
|
||||
@ -377,7 +373,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
|
||||
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
|
||||
; FLATSCR-NEXT: global_store_dword v[0:1], v0, off
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x80
|
||||
; FLATSCR-NEXT: s_addk_i32 s32, 0xff80
|
||||
; FLATSCR-NEXT: s_mov_b32 s33, s3
|
||||
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
|
@ -29,25 +29,25 @@ body: |
|
||||
; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
|
||||
; CHECK: liveins: $vgpr1, $vgpr2
|
||||
; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
|
||||
; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
|
||||
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
||||
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
|
||||
; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc
|
||||
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
|
||||
; CHECK: $vgpr3 = COPY killed $sgpr33
|
||||
; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc
|
||||
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
|
||||
; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
|
||||
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
||||
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
||||
; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0
|
||||
; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
|
||||
; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
|
||||
; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; CHECK: S_ENDPGM 0, implicit $vcc
|
||||
@ -81,18 +81,18 @@ body: |
|
||||
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
|
||||
; CHECK: liveins: $sgpr29, $vgpr1
|
||||
; CHECK: $sgpr29 = frame-setup COPY $sgpr33
|
||||
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
||||
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
|
||||
; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc
|
||||
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
|
||||
; CHECK: $vgpr2 = COPY killed $sgpr33
|
||||
; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc
|
||||
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
|
||||
; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
|
||||
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31
|
||||
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-destroy COPY $sgpr29
|
||||
; CHECK: S_ENDPGM 0, implicit $vcc
|
||||
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
||||
@ -125,16 +125,16 @@ body: |
|
||||
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64
|
||||
; CHECK: liveins: $sgpr28, $vgpr1
|
||||
; CHECK: $sgpr28 = frame-setup COPY $sgpr33
|
||||
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
||||
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; CHECK: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
|
||||
; CHECK: $sgpr29 = S_ADD_U32 killed $sgpr29, 8192, implicit-def $scc
|
||||
; CHECK: $sgpr29 = S_ADD_I32 killed $sgpr29, 8192, implicit-def $scc
|
||||
; CHECK: $vgpr2 = COPY killed $sgpr29
|
||||
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
|
||||
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-destroy COPY $sgpr28
|
||||
; CHECK: S_ENDPGM 0, implicit $vcc
|
||||
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
||||
@ -166,16 +166,16 @@ body: |
|
||||
; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc
|
||||
; CHECK: liveins: $sgpr28, $vgpr1
|
||||
; CHECK: $sgpr28 = frame-setup COPY $sgpr33
|
||||
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31
|
||||
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; CHECK: $vcc_lo = S_MOV_B32 8192
|
||||
; CHECK: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
|
||||
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
|
||||
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-destroy COPY $sgpr28
|
||||
; CHECK: S_ENDPGM 0
|
||||
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31
|
||||
|
@ -25,44 +25,44 @@ body: |
|
||||
; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs
|
||||
; MUBUF: liveins: $vgpr1, $vgpr2
|
||||
; MUBUF: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; MUBUF: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; MUBUF: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
|
||||
; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
|
||||
; MUBUF: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
|
||||
; MUBUF: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
||||
; MUBUF: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
||||
; MUBUF: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
||||
; MUBUF: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; MUBUF: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
||||
; MUBUF: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
||||
; MUBUF: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; MUBUF: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; MUBUF: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec
|
||||
; MUBUF: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
||||
; MUBUF: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; MUBUF: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
||||
; MUBUF: $sgpr33 = V_READLANE_B32 $vgpr2, 0
|
||||
; MUBUF: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; MUBUF: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; MUBUF: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
|
||||
; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
|
||||
; MUBUF: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; MUBUF: S_ENDPGM 0, implicit $vcc
|
||||
; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs
|
||||
; FLATSCR: liveins: $vgpr1, $vgpr2
|
||||
; FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
|
||||
; FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
|
||||
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
|
||||
; FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
|
||||
; FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
|
||||
; FLATSCR: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
|
||||
; FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def $scc
|
||||
; FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc
|
||||
; FLATSCR: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def $scc
|
||||
; FLATSCR: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
||||
; FLATSCR: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
|
||||
; FLATSCR: $sgpr33 = S_ADD_U32 $sgpr33, 8192, implicit-def $scc
|
||||
; FLATSCR: $sgpr33 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
|
||||
; FLATSCR: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
||||
; FLATSCR: $sgpr33 = S_SUB_U32 $sgpr33, 8192, implicit-def $scc
|
||||
; FLATSCR: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 24576, implicit-def $scc
|
||||
; FLATSCR: $sgpr33 = S_ADD_I32 $sgpr33, -8192, implicit-def $scc
|
||||
; FLATSCR: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def $scc
|
||||
; FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0
|
||||
; FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
|
||||
; FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
|
||||
; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
|
||||
; FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; FLATSCR: S_ENDPGM 0, implicit $vcc
|
||||
|
@ -27,13 +27,13 @@ body: |
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5)
|
||||
; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
|
||||
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 262080, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc
|
||||
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 524288, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 524288, implicit-def $scc
|
||||
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
||||
; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
||||
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 524288, implicit-def $scc
|
||||
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -524288, implicit-def $scc
|
||||
; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0
|
||||
; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.2, addrspace 5)
|
||||
|
@ -26,71 +26,71 @@ body: |
|
||||
; GFX8-LABEL: name: pei_scavenge_vgpr_spill
|
||||
; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
|
||||
; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX8: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX8: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
|
||||
; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; GFX8: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
|
||||
; GFX8: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
||||
; GFX8: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
||||
; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
||||
; GFX8: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; GFX8: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
||||
; GFX8: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; GFX8: $sgpr7 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX8: $sgpr7 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
|
||||
; GFX8: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; GFX8: $vcc_lo = S_MOV_B32 8192
|
||||
; GFX8: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
|
||||
; GFX8: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
|
||||
; GFX8: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; GFX8: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
||||
; GFX8: $sgpr33 = V_READLANE_B32 $vgpr2, 0
|
||||
; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX8: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX8: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX8: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
|
||||
; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; GFX8: $sgpr4 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX8: $sgpr4 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
|
||||
; GFX8: S_ENDPGM 0, csr_amdgpu_allvgprs
|
||||
; GFX9-LABEL: name: pei_scavenge_vgpr_spill
|
||||
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
|
||||
; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX9: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX9: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
|
||||
; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; GFX9: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
|
||||
; GFX9: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
||||
; GFX9: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
||||
; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
||||
; GFX9: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; GFX9: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
||||
; GFX9: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; GFX9: $sgpr7 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX9: $sgpr7 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
|
||||
; GFX9: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; GFX9: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec
|
||||
; GFX9: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
|
||||
; GFX9: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; GFX9: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
||||
; GFX9: $sgpr33 = V_READLANE_B32 $vgpr2, 0
|
||||
; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX9: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX9: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
|
||||
; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; GFX9: $sgpr4 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX9: $sgpr4 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
|
||||
; GFX9: S_ENDPGM 0, csr_amdgpu_allvgprs
|
||||
; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
|
||||
; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
|
||||
; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
|
||||
; GFX9-FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
|
||||
; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
|
||||
; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
|
||||
; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
|
||||
; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
|
||||
; GFX9-FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def $scc
|
||||
; GFX9-FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc
|
||||
; GFX9-FLATSCR: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def $scc
|
||||
; GFX9-FLATSCR: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
|
||||
; GFX9-FLATSCR: $vcc_hi = S_ADD_U32 $sgpr33, 8192, implicit-def $scc
|
||||
; GFX9-FLATSCR: $vcc_hi = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
|
||||
; GFX9-FLATSCR: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec
|
||||
; GFX9-FLATSCR: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 24576, implicit-def $scc
|
||||
; GFX9-FLATSCR: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def $scc
|
||||
; GFX9-FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0
|
||||
; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
|
||||
; GFX9-FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
|
||||
; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
|
||||
; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; GFX9-FLATSCR: S_ENDPGM 0, csr_amdgpu_allvgprs
|
||||
|
@ -602,7 +602,7 @@ body: |
|
||||
; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0
|
||||
; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
|
||||
; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
|
||||
; GCN64-MUBUF: $sgpr2 = S_ADD_U32 $sgpr33, 262144, implicit-def $scc
|
||||
; GCN64-MUBUF: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def $scc
|
||||
; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, align 4096, addrspace 5)
|
||||
; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
|
||||
; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
|
||||
@ -764,7 +764,7 @@ body: |
|
||||
; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
|
||||
; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
|
||||
; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
|
||||
; GCN32-MUBUF: $sgpr1 = S_ADD_U32 $sgpr33, 131072, implicit-def $scc
|
||||
; GCN32-MUBUF: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def $scc
|
||||
; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, align 4096, addrspace 5)
|
||||
; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
|
||||
; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
|
||||
@ -922,7 +922,7 @@ body: |
|
||||
; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0
|
||||
; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
|
||||
; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
|
||||
; GCN64-FLATSCR: $sgpr9 = S_ADD_U32 $sgpr33, 4096, implicit-def $scc
|
||||
; GCN64-FLATSCR: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def $scc
|
||||
; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, align 4096, addrspace 5)
|
||||
; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
|
||||
; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
|
||||
@ -1129,7 +1129,7 @@ body: |
|
||||
; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
|
||||
; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0
|
||||
; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
|
||||
; GCN64-MUBUF: $sgpr2 = S_ADD_U32 $sgpr33, 262144, implicit-def $scc
|
||||
; GCN64-MUBUF: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def $scc
|
||||
; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, align 4096, addrspace 5)
|
||||
; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
|
||||
; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
|
||||
@ -1265,7 +1265,7 @@ body: |
|
||||
; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
|
||||
; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
|
||||
; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
|
||||
; GCN32-MUBUF: $sgpr1 = S_ADD_U32 $sgpr33, 131072, implicit-def $scc
|
||||
; GCN32-MUBUF: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def $scc
|
||||
; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, align 4096, addrspace 5)
|
||||
; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
|
||||
; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
|
||||
@ -1397,7 +1397,7 @@ body: |
|
||||
; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
|
||||
; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0
|
||||
; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
|
||||
; GCN64-FLATSCR: $sgpr9 = S_ADD_U32 $sgpr33, 4096, implicit-def $scc
|
||||
; GCN64-FLATSCR: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def $scc
|
||||
; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, align 4096, addrspace 5)
|
||||
; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
|
||||
; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
|
||||
|
@ -200,7 +200,7 @@ entry:
|
||||
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec
|
||||
; GCN: s_mov_b32 s33, s32
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x400
|
||||
; GCN-DAG: s_addk_i32 s32, 0x400
|
||||
|
||||
; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
@ -224,7 +224,7 @@ entry:
|
||||
; GCN-DAG: v_readlane_b32 s34, v42, 0
|
||||
; GCN-DAG: v_readlane_b32 s35, v42, 1
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0x400
|
||||
; GCN: s_addk_i32 s32, 0xfc00
|
||||
; GCN-NEXT: v_readlane_b32 s33,
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
||||
|
@ -78,10 +78,10 @@ entry:
|
||||
; 0x40000 / 64 = 4096 (for wave64)
|
||||
%a = load volatile i32, i32 addrspace(5)* %aptr
|
||||
|
||||
; MUBUF: s_add_u32 s32, s32, 0x40000
|
||||
; MUBUF: s_add_i32 s32, s32, 0x40000
|
||||
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Spill
|
||||
; MUBUF: s_sub_u32 s32, s32, 0x40000
|
||||
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0x1000
|
||||
; MUBUF: s_add_i32 s32, s32, 0xfffc0000
|
||||
; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1000
|
||||
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
|
||||
call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a)
|
||||
|
||||
@ -97,10 +97,10 @@ entry:
|
||||
|
||||
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
|
||||
|
||||
; MUBUF: s_add_u32 s32, s32, 0x40000
|
||||
; MUBUF: s_add_i32 s32, s32, 0x40000
|
||||
; MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Reload
|
||||
; MUBUF: s_sub_u32 s32, s32, 0x40000
|
||||
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0x1000
|
||||
; MUBUF: s_add_i32 s32, s32, 0xfffc0000
|
||||
; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1000
|
||||
; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload
|
||||
|
||||
; Force %a to spill with no free SGPRs
|
||||
@ -202,9 +202,9 @@ entry:
|
||||
|
||||
%aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
|
||||
; 0x40000 / 64 = 4096 (for wave64)
|
||||
; MUBUF: s_add_u32 s4, s32, 0x40000
|
||||
; MUBUF: s_add_i32 s4, s32, 0x40000
|
||||
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
|
||||
; FLATSCR: s_add_u32 s0, s32, 0x1000
|
||||
; FLATSCR: s_add_i32 s0, s32, 0x1000
|
||||
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s0 ; 4-byte Folded Spill
|
||||
%a = load volatile i32, i32 addrspace(5)* %aptr
|
||||
|
||||
@ -257,7 +257,7 @@ entry:
|
||||
%bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
|
||||
|
||||
; 0x3ff00 / 64 = 4092 (for wave64)
|
||||
; MUBUF: s_add_u32 s4, s32, 0x3ff00
|
||||
; MUBUF: s_add_i32 s4, s32, 0x3ff00
|
||||
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
|
||||
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill
|
||||
; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4092 ; 8-byte Folded Spill
|
||||
|
@ -45,11 +45,12 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_limited_sgpr
|
||||
; GFX6: s_add_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
|
||||
; GFX6: s_add_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
|
||||
; GFX6: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
|
||||
; GFX6: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
|
||||
; GFX6: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], s32
|
||||
; GFX6-NEXT: s_sub_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
|
||||
; GFX6-NEXT: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
|
||||
; GFX6: NumSgprs: 48
|
||||
; GFX6: ScratchSize: 8608
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
define amdgpu_kernel void @max_alignment_128() #0 {
|
||||
; VI-LABEL: max_alignment_128:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_add_u32 s4, s4, s7
|
||||
; VI-NEXT: s_add_i32 s4, s4, s7
|
||||
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; VI-NEXT: s_add_u32 s0, s0, s7
|
||||
; VI-NEXT: s_addc_u32 s1, s1, 0
|
||||
@ -110,7 +110,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
|
||||
define amdgpu_kernel void @stackrealign_attr() #1 {
|
||||
; VI-LABEL: stackrealign_attr:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_add_u32 s4, s4, s7
|
||||
; VI-NEXT: s_add_i32 s4, s4, s7
|
||||
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; VI-NEXT: s_add_u32 s0, s0, s7
|
||||
; VI-NEXT: s_addc_u32 s1, s1, 0
|
||||
@ -214,7 +214,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
|
||||
define amdgpu_kernel void @alignstack_attr() #2 {
|
||||
; VI-LABEL: alignstack_attr:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_add_u32 s4, s4, s7
|
||||
; VI-NEXT: s_add_i32 s4, s4, s7
|
||||
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
|
||||
; VI-NEXT: s_add_u32 s0, s0, s7
|
||||
; VI-NEXT: s_addc_u32 s1, s1, 0
|
||||
|
@ -32,17 +32,17 @@ define void @needs_align16_default_stack_align(i32 %idx) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}needs_align16_stack_align4:
|
||||
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}}
|
||||
; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}}
|
||||
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffffc00
|
||||
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
|
||||
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
|
||||
; GCN: s_add_u32 s32, s32, 0x2800{{$}}
|
||||
; GCN: s_addk_i32 s32, 0x2800{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0x2800
|
||||
; GCN: s_addk_i32 s32, 0xd800
|
||||
|
||||
; GCN: ; ScratchSize: 160
|
||||
define void @needs_align16_stack_align4(i32 %idx) #2 {
|
||||
@ -53,17 +53,17 @@ define void @needs_align16_stack_align4(i32 %idx) #2 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}needs_align32:
|
||||
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}}
|
||||
; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}}
|
||||
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffff800
|
||||
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
|
||||
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
|
||||
; GCN: s_add_u32 s32, s32, 0x3000{{$}}
|
||||
; GCN: s_addk_i32 s32, 0x3000{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0x3000
|
||||
; GCN: s_addk_i32 s32, 0xd000
|
||||
|
||||
; GCN: ; ScratchSize: 192
|
||||
define void @needs_align32(i32 %idx) #0 {
|
||||
@ -74,12 +74,12 @@ define void @needs_align32(i32 %idx) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}force_realign4:
|
||||
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}}
|
||||
; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}}
|
||||
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffffff00
|
||||
; GCN: s_add_u32 s32, s32, 0xd00{{$}}
|
||||
; GCN: s_addk_i32 s32, 0xd00{{$}}
|
||||
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
|
||||
; GCN: s_sub_u32 s32, s32, 0xd00
|
||||
; GCN: s_addk_i32 s32, 0xf300
|
||||
|
||||
; GCN: ; ScratchSize: 52
|
||||
define void @force_realign4(i32 %idx) #1 {
|
||||
@ -125,12 +125,12 @@ define amdgpu_kernel void @kernel_call_align4_from_5() {
|
||||
|
||||
; GCN-LABEL: {{^}}default_realign_align128:
|
||||
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
|
||||
; GCN-NEXT: s_add_u32 s33, s32, 0x1fc0
|
||||
; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0
|
||||
; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x4000
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x4000
|
||||
; GCN-NOT: s33
|
||||
; GCN: buffer_store_dword v0, off, s[0:3], s33{{$}}
|
||||
; GCN: s_sub_u32 s32, s32, 0x4000
|
||||
; GCN: s_addk_i32 s32, 0xc000
|
||||
; GCN: s_mov_b32 s33, [[FP_COPY]]
|
||||
define void @default_realign_align128(i32 %idx) #0 {
|
||||
%alloca.align = alloca i32, align 128, addrspace(5)
|
||||
@ -159,7 +159,7 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 {
|
||||
; GCN: buffer_store_dword [[VGPR_REG:v[0-9]+]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GCN-NEXT: v_writelane_b32 [[VGPR_REG]], s33, 2
|
||||
; GCN-DAG: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
|
||||
; GCN-DAG: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
|
||||
; GCN-DAG: v_writelane_b32 [[VGPR_REG]], s34, 3
|
||||
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000
|
||||
; GCN: s_mov_b32 s34, s32
|
||||
@ -167,11 +167,11 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 {
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s33 offset:1024
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34
|
||||
; GCN-DAG: s_add_u32 s32, s32, 0x30000
|
||||
; GCN-DAG: s_add_i32 s32, s32, 0x30000
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
|
||||
; GCN: s_swappc_b64 s[30:31], s[4:5]
|
||||
|
||||
; GCN: s_sub_u32 s32, s32, 0x30000
|
||||
; GCN: s_add_i32 s32, s32, 0xfffd0000
|
||||
; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2
|
||||
; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
@ -193,17 +193,17 @@ define i32 @needs_align1024_stack_args_used_inside_loop(%struct.Data addrspace(5
|
||||
|
||||
; GCN-LABEL: needs_align1024_stack_args_used_inside_loop:
|
||||
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
|
||||
; GCN-NEXT: s_add_u32 s33, s32, 0xffc0
|
||||
; GCN-NEXT: s_add_i32 s33, s32, 0xffc0
|
||||
; GCN-NEXT: s_mov_b32 [[BP_COPY:s[0-9]+]], s34
|
||||
; GCN-NEXT: s_mov_b32 s34, s32
|
||||
; GCN-NEXT: s_and_b32 s33, s33, 0xffff0000
|
||||
; GCN-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0
|
||||
; GCN-NEXT: v_lshrrev_b32_e64 [[VGPR_REG:v[0-9]+]], 6, s34
|
||||
; GCN: s_add_u32 s32, s32, 0x30000
|
||||
; GCN: s_add_i32 s32, s32, 0x30000
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:1024
|
||||
; GCN: buffer_load_dword v{{[0-9]+}}, [[VGPR_REG]], s[0:3], 0 offen
|
||||
; GCN: v_add_u32_e32 [[VGPR_REG]], vcc, 4, [[VGPR_REG]]
|
||||
; GCN: s_sub_u32 s32, s32, 0x30000
|
||||
; GCN: s_add_i32 s32, s32, 0xfffd0000
|
||||
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
|
||||
; GCN-NEXT: s_mov_b32 s34, [[BP_COPY]]
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -290,16 +290,16 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i
|
||||
|
||||
; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset
|
||||
; GCN: s_or_saveexec_b64 s[4:5], -1
|
||||
; GCN-NEXT: s_add_u32 s6, s32, 0x42100
|
||||
; GCN-NEXT: s_add_i32 s6, s32, 0x42100
|
||||
; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s33
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x1088
|
||||
; GCN-NEXT: s_add_u32 s6, s32, 0x42200
|
||||
; GCN-NEXT: s_add_i32 s6, s32, 0x42200
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s34
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x108c
|
||||
; GCN-NEXT: s_add_u32 s6, s32, 0x42300
|
||||
; GCN-NEXT: s_add_i32 s6, s32, 0x42300
|
||||
; GCN-NEXT: s_mov_b32 s34, s32
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
|
||||
%local_val = alloca i32, align 128, addrspace(5)
|
||||
|
@ -11,7 +11,7 @@ define hidden void @widget() {
|
||||
; GCN-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GCN-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x400
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: flat_load_dword v0, v[0:1]
|
||||
@ -53,7 +53,7 @@ define hidden void @widget() {
|
||||
; GCN-NEXT: BB0_7: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: v_readlane_b32 s4, v40, 0
|
||||
; GCN-NEXT: v_readlane_b32 s5, v40, 1
|
||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GCN-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GCN-NEXT: v_readlane_b32 s33, v40, 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
|
||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -191,7 +191,7 @@ define hidden void @blam() {
|
||||
; GCN-NEXT: s_mov_b64 exec, s[4:5]
|
||||
; GCN-NEXT: v_writelane_b32 v43, s33, 4
|
||||
; GCN-NEXT: s_mov_b32 s33, s32
|
||||
; GCN-NEXT: s_add_u32 s32, s32, 0x800
|
||||
; GCN-NEXT: s_addk_i32 s32, 0x800
|
||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
|
||||
|
@ -1129,8 +1129,8 @@ declare void @external_void_func_void() #1
|
||||
|
||||
; GCN-NEXT: v_writelane_b32 v40, s33, 2
|
||||
; GCN: s_mov_b32 s33, s32
|
||||
; GFX1064: s_add_u32 s32, s32, 0x400
|
||||
; GFX1032: s_add_u32 s32, s32, 0x200
|
||||
; GFX1064: s_addk_i32 s32, 0x400
|
||||
; GFX1032: s_addk_i32 s32, 0x200
|
||||
|
||||
|
||||
; GCN-DAG: v_writelane_b32 v40, s30, 0
|
||||
@ -1140,8 +1140,8 @@ declare void @external_void_func_void() #1
|
||||
; GCN-DAG: v_readlane_b32 s5, v40, 1
|
||||
|
||||
|
||||
; GFX1064: s_sub_u32 s32, s32, 0x400
|
||||
; GFX1032: s_sub_u32 s32, s32, 0x200
|
||||
; GFX1064: s_addk_i32 s32, 0xfc00
|
||||
; GFX1032: s_addk_i32 s32, 0xfe00
|
||||
; GCN: v_readlane_b32 s33, v40, 2
|
||||
; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}
|
||||
|
@ -354,7 +354,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
|
||||
; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11]
|
||||
; GFX9-O0-NEXT: v_writelane_b32 v3, s33, 7
|
||||
; GFX9-O0-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-O0-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-O0-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-O0-NEXT: v_writelane_b32 v3, s30, 0
|
||||
; GFX9-O0-NEXT: v_writelane_b32 v3, s31, 1
|
||||
; GFX9-O0-NEXT: v_writelane_b32 v3, s8, 2
|
||||
@ -395,7 +395,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
|
||||
; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11]
|
||||
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[4:7], s8 offset:4
|
||||
; GFX9-O0-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-O0-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-O0-NEXT: v_readlane_b32 s33, v3, 7
|
||||
; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1
|
||||
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
||||
@ -414,7 +414,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
|
||||
; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11]
|
||||
; GFX9-O3-NEXT: s_mov_b32 s14, s33
|
||||
; GFX9-O3-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-O3-NEXT: s_add_u32 s32, s32, 0x400
|
||||
; GFX9-O3-NEXT: s_addk_i32 s32, 0x400
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[10:11], s[30:31]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v2, s8
|
||||
; GFX9-O3-NEXT: s_not_b64 exec, exec
|
||||
@ -431,7 +431,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
|
||||
; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4
|
||||
; GFX9-O3-NEXT: s_sub_u32 s32, s32, 0x400
|
||||
; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00
|
||||
; GFX9-O3-NEXT: s_mov_b32 s33, s14
|
||||
; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1
|
||||
; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
@ -555,7 +555,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
|
||||
; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11]
|
||||
; GFX9-O0-NEXT: v_writelane_b32 v11, s33, 9
|
||||
; GFX9-O0-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-O0-NEXT: s_add_u32 s32, s32, 0xc00
|
||||
; GFX9-O0-NEXT: s_addk_i32 s32, 0xc00
|
||||
; GFX9-O0-NEXT: v_writelane_b32 v11, s30, 0
|
||||
; GFX9-O0-NEXT: v_writelane_b32 v11, s31, 1
|
||||
; GFX9-O0-NEXT: v_writelane_b32 v11, s9, 2
|
||||
@ -621,7 +621,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
|
||||
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
|
||||
; GFX9-O0-NEXT: s_mov_b32 s8, 0
|
||||
; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], s8 offset:4
|
||||
; GFX9-O0-NEXT: s_sub_u32 s32, s32, 0xc00
|
||||
; GFX9-O0-NEXT: s_addk_i32 s32, 0xf400
|
||||
; GFX9-O0-NEXT: v_readlane_b32 s33, v11, 9
|
||||
; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1
|
||||
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
||||
@ -663,7 +663,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
|
||||
; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11]
|
||||
; GFX9-O3-NEXT: s_mov_b32 s14, s33
|
||||
; GFX9-O3-NEXT: s_mov_b32 s33, s32
|
||||
; GFX9-O3-NEXT: s_add_u32 s32, s32, 0x800
|
||||
; GFX9-O3-NEXT: s_addk_i32 s32, 0x800
|
||||
; GFX9-O3-NEXT: s_mov_b64 s[10:11], s[30:31]
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v6, s8
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v7, s9
|
||||
@ -688,7 +688,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3
|
||||
; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4
|
||||
; GFX9-O3-NEXT: s_sub_u32 s32, s32, 0x800
|
||||
; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800
|
||||
; GFX9-O3-NEXT: s_mov_b32 s33, s14
|
||||
; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1
|
||||
; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
|
Loading…
x
Reference in New Issue
Block a user