1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[AMDGPU] Use s_add_i32 for address additions

This allows to convert the add instruction to s_addk_i32 and
v_add_nc_u32 instead of needing v_add_co_u32 when converting to a VALU
instruction.

Differential Revision: https://reviews.llvm.org/D103322
This commit is contained in:
Sebastian Neubauer 2021-06-07 16:09:48 +02:00
parent a86aa7478c
commit 38d0179c03
43 changed files with 1018 additions and 1018 deletions

View File

@ -1894,7 +1894,7 @@ static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
FI->getValueType(0));
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, SDLoc(SAddr),
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
MVT::i32, TFI, SAddr.getOperand(1)),
0);
}
@ -1936,8 +1936,9 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
SAddr.getOpcode() == ISD::TargetFrameIndex
? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
: CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32,
SAddr, AddOffset), 0);
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
SAddr, AddOffset),
0);
}
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);

View File

@ -3694,9 +3694,9 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
const DebugLoc &DL = I.getDebugLoc();
SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), SAddr)
.addFrameIndex(FI)
.addReg(RHSDef->Reg);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
.addFrameIndex(FI)
.addReg(RHSDef->Reg);
}
}

View File

@ -307,9 +307,9 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
// Add wave offset in bytes to private base offset.
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
// Convert offset to 256-byte units.
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
@ -909,9 +909,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
LiveRegs.addLiveIns(MBB);
}
// s_add_u32 s33, s32, NumBytes
// s_add_i32 s33, s32, NumBytes
// s_and_b32 s33, s33, 0b111...0000
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), FramePtrReg)
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
.addReg(StackPtrReg)
.addImm((Alignment - 1) * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
@ -937,7 +937,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
}
if (HasFP && RoundedSize != 0) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(RoundedSize * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
@ -988,10 +988,10 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
if (RoundedSize != 0 && hasFP(MF)) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(RoundedSize * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameDestroy);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
.setMIFlag(MachineInstr::FrameDestroy);
}
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
@ -1294,10 +1294,12 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Register SPReg = MFI->getStackPtrOffsetReg();
unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
BuildMI(MBB, I, DL, TII->get(Op), SPReg)
.addReg(SPReg)
.addImm(Amount * getScratchScaleFactor(ST));
Amount *= getScratchScaleFactor(ST);
if (IsDestroy)
Amount = -Amount;
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
.addReg(SPReg)
.addImm(Amount);
} else if (CalleePopAmount != 0) {
llvm_unreachable("is this used?");
}

View File

@ -703,7 +703,7 @@ Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx);
if (ST.enableFlatScratch() ) {
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_U32), BaseReg)
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
.addReg(OffsetReg, RegState::Kill)
.addReg(FIReg);
return BaseReg;
@ -1113,7 +1113,7 @@ void SIRegisterInfo::buildSpillLoadStore(
if (ScratchOffsetReg == AMDGPU::NoRegister) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
} else {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
.addReg(ScratchOffsetReg)
.addImm(Offset);
}
@ -1262,9 +1262,9 @@ void SIRegisterInfo::buildSpillLoadStore(
if (ScratchOffsetRegDelta != 0) {
// Subtract the offset we added to the ScratchOffset register.
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset)
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
.addReg(SOffset)
.addImm(ScratchOffsetRegDelta);
.addImm(-ScratchOffsetRegDelta);
}
}
@ -1707,9 +1707,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
FIOp.setIsKill(false);
}
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), TmpSReg)
.addReg(FrameReg)
.addImm(Offset);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
.addReg(FrameReg)
.addImm(Offset);
if (!UseSGPR)
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
@ -1717,10 +1717,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (TmpSReg == FrameReg) {
// Undo frame register modification.
BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_SUB_U32),
BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
FrameReg)
.addReg(FrameReg)
.addImm(Offset);
.addReg(FrameReg)
.addImm(-Offset);
}
return;
@ -1794,17 +1794,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
.addReg(FrameReg)
.addImm(ST.getWavefrontSizeLog2());
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg)
.addReg(ScaledReg, RegState::Kill)
.addImm(Offset);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
.addReg(ScaledReg, RegState::Kill)
.addImm(Offset);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
.addReg(ScaledReg, RegState::Kill);
// If there were truly no free SGPRs, we need to undo everything.
if (!TmpScaledReg.isValid()) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg)
.addReg(ScaledReg, RegState::Kill)
.addImm(Offset);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
.addReg(ScaledReg, RegState::Kill)
.addImm(-Offset);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
.addReg(FrameReg)
.addImm(ST.getWavefrontSizeLog2());

View File

@ -55,7 +55,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@ -71,7 +71,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
; GFX9-NEXT: s_add_u32 s4, s32, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@ -81,7 +81,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@ -95,7 +95,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
; GFX10-NEXT: s_and_b32 s4, s4, -16
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
; GFX10-NEXT: s_add_u32 s4, s32, s4
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_mov_b32_e32 v1, s4
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@ -157,7 +157,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@ -173,7 +173,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
; GFX9-NEXT: s_add_u32 s4, s32, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@ -183,7 +183,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@ -197,7 +197,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
; GFX10-NEXT: s_and_b32 s4, s4, -16
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
; GFX10-NEXT: s_add_u32 s4, s32, s4
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_mov_b32_e32 v1, s4
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@ -260,9 +260,9 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, s33
; GFX9-NEXT: s_add_u32 s33, s32, 0x7c0
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
; GFX9-NEXT: s_add_u32 s32, s32, 0x1000
; GFX9-NEXT: s_addk_i32 s32, 0x1000
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@ -279,7 +279,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
; GFX9-NEXT: s_and_b32 s4, s4, 0xfffff800
; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
; GFX9-NEXT: s_sub_u32 s32, s32, 0x1000
; GFX9-NEXT: s_addk_i32 s32, 0xf000
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@ -288,10 +288,10 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s6, s33
; GFX10-NEXT: s_add_u32 s33, s32, 0x3e0
; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
; GFX10-NEXT: s_add_u32 s32, s32, 0x800
; GFX10-NEXT: s_addk_i32 s32, 0x800
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@ -305,7 +305,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
; GFX10-NEXT: s_add_u32 s4, s32, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0xfffffc00
; GFX10-NEXT: s_sub_u32 s32, s32, 0x800
; GFX10-NEXT: s_addk_i32 s32, 0xf800
; GFX10-NEXT: v_mov_b32_e32 v1, s4
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0

View File

@ -9,7 +9,7 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s6, s33
; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0
; GCN-NEXT: s_add_i32 s33, s32, 0x3fc0
; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000
; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
@ -56,8 +56,8 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) {
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_add_u32_e32 v1, 0x100, v1
; GCN-NEXT: v_add_u32_e32 v0, v1, v0
; GCN-NEXT: s_add_u32 s32, s32, 0x10000
; GCN-NEXT: s_sub_u32 s32, s32, 0x10000
; GCN-NEXT: s_add_i32 s32, s32, 0x10000
; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill
; GCN-NEXT: s_waitcnt vmcnt(0)
@ -270,7 +270,7 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s6, s33
; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0
; GCN-NEXT: s_add_i32 s33, s32, 0x3fc0
; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000
; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
@ -317,8 +317,8 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_and_b32_e32 v1, 1, v2
; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1
; GCN-NEXT: s_add_u32 s32, s32, 0x10000
; GCN-NEXT: s_sub_u32 s32, s32, 0x10000
; GCN-NEXT: s_add_i32 s32, s32, 0x10000
; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill
; GCN-NEXT: s_waitcnt vmcnt(0)
@ -536,7 +536,7 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s6, s33
; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0
; GCN-NEXT: s_add_i32 s33, s32, 0x3fc0
; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000
; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
@ -583,8 +583,8 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GCN-NEXT: v_add_u32_e32 v2, 0x100, v2
; GCN-NEXT: v_add_u32_e32 v1, v2, v0
; GCN-NEXT: s_add_u32 s32, s32, 0x10000
; GCN-NEXT: s_sub_u32 s32, s32, 0x10000
; GCN-NEXT: s_add_i32 s32, s32, 0x10000
; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill
; GCN-NEXT: s_waitcnt vmcnt(0)

View File

@ -13,10 +13,10 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-NEXT: s_and_b32 s0, s0, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: s_add_u32 s1, 4, s1
; GFX9-NEXT: s_add_i32 s1, s1, 4
; GFX9-NEXT: scratch_store_dword off, v0, s1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, 4, s0
; GFX9-NEXT: s_add_i32 s0, s0, 4
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -33,8 +33,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-NEXT: s_and_b32 s1, s0, 15
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
; GFX10-NEXT: s_add_u32 s0, 4, s0
; GFX10-NEXT: s_add_u32 s1, 4, s1
; GFX10-NEXT: s_add_i32 s0, s0, 4
; GFX10-NEXT: s_add_i32 s1, s1, 4
; GFX10-NEXT: scratch_store_dword off, v0, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -185,10 +185,10 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
; GFX9-NEXT: s_and_b32 s0, s0, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: s_add_u32 s1, 0x104, s1
; GFX9-NEXT: s_addk_i32 s1, 0x104
; GFX9-NEXT: scratch_store_dword off, v0, s1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, 0x104, s0
; GFX9-NEXT: s_addk_i32 s0, 0x104
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -207,8 +207,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
; GFX10-NEXT: s_and_b32 s1, s0, 15
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
; GFX10-NEXT: s_add_u32 s0, 0x104, s0
; GFX10-NEXT: s_add_u32 s1, 0x104, s1
; GFX10-NEXT: s_addk_i32 s0, 0x104
; GFX10-NEXT: s_addk_i32 s1, 0x104
; GFX10-NEXT: scratch_store_dword off, v0, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -295,7 +295,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x100
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
; GFX9-NEXT: v_mov_b32_e32 v2, vcc_hi
@ -314,7 +314,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x100
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
; GFX10-NEXT: v_mov_b32_e32 v3, 15
@ -357,10 +357,10 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
; GFX9-NEXT: s_and_b32 s0, s0, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: s_add_u32 s1, 0x4004, s1
; GFX9-NEXT: s_addk_i32 s1, 0x4004
; GFX9-NEXT: scratch_store_dword off, v0, s1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
; GFX9-NEXT: s_addk_i32 s0, 0x4004
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -379,8 +379,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
; GFX10-NEXT: s_and_b32 s1, s0, 15
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
; GFX10-NEXT: s_add_u32 s0, 0x4004, s0
; GFX10-NEXT: s_add_u32 s1, 0x4004, s1
; GFX10-NEXT: s_addk_i32 s0, 0x4004
; GFX10-NEXT: s_addk_i32 s1, 0x4004
; GFX10-NEXT: scratch_store_dword off, v0, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -467,7 +467,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
; GFX9-NEXT: v_mov_b32_e32 v2, vcc_hi
@ -486,7 +486,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
; GFX10-NEXT: v_mov_b32_e32 v3, 15
@ -527,7 +527,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: s_add_u32 s0, 4, s0
; GFX9-NEXT: s_add_i32 s0, s0, 4
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@ -543,7 +543,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
; GFX10-NEXT: s_add_u32 s0, 4, s0
; GFX10-NEXT: s_add_i32 s0, s0, 4
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0
@ -571,7 +571,7 @@ define void @store_load_large_imm_offset_foo() {
; GFX9-NEXT: scratch_store_dword off, v0, s32
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: s_add_u32 s0, s32, s0
; GFX9-NEXT: s_add_i32 s0, s0, s32
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@ -585,7 +585,7 @@ define void @store_load_large_imm_offset_foo() {
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
; GFX10-NEXT: s_add_u32 s0, s32, s0
; GFX10-NEXT: s_add_i32 s0, s0, s32
; GFX10-NEXT: scratch_store_dword off, v0, s32
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0

View File

@ -155,7 +155,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
; GCN-NEXT: s_mov_b32 s7, s33
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_cbranch_execz BB2_3
; GCN-NEXT: ; %bb.1: ; %bb.0
@ -181,7 +181,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: global_store_dword v[0:1], v0, off
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: s_mov_b32 s33, s7
; GCN-NEXT: s_setpc_b64 s[30:31]
@ -217,10 +217,10 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s7, s33
; GCN-NEXT: s_add_u32 s33, s32, 0xfc0
; GCN-NEXT: s_add_i32 s33, s32, 0xfc0
; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GCN-NEXT: s_add_u32 s32, s32, 0x2000
; GCN-NEXT: s_addk_i32 s32, 0x2000
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_cbranch_execz BB3_2
; GCN-NEXT: ; %bb.1: ; %bb.0
@ -243,7 +243,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: global_store_dword v[0:1], v0, off
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_sub_u32 s32, s32, 0x2000
; GCN-NEXT: s_addk_i32 s32, 0xe000
; GCN-NEXT: s_mov_b32 s33, s7
; GCN-NEXT: s_setpc_b64 s[30:31]
entry:

View File

@ -347,7 +347,7 @@ end:
; Check for prologue initializing special SGPRs pointing to scratch.
; HSA-LABEL: {{^}}store_flat_scratch:
; CI-DAG: s_mov_b32 flat_scratch_lo, s9
; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
; CI-DAG: s_add_i32 [[ADD:s[0-9]+]], s8, s11
; CI-DAG: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
; GFX9: s_add_u32 flat_scratch_lo, s6, s9

View File

@ -42,7 +42,7 @@
; HSA-ALLOCA: .end_amd_kernel_code_t
; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7
; HSA-ALLOCA: s_add_u32 s6, s6, s9
; HSA-ALLOCA: s_add_i32 s6, s6, s9
; HSA-ALLOCA: s_lshr_b32 flat_scratch_hi, s6, 8
; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen ; encoding: [0x00,0x10,0x70,0xe0

View File

@ -5,7 +5,7 @@
; GCN-LABEL: {{^}}test_call_undef:
; SDAG: s_mov_b32 flat_scratch_lo, s13
; SDAG: s_add_u32 s12, s12, s17
; SDAG: s_add_i32 s12, s12, s17
; SDAG: s_lshr_b32
; GCN: s_endpgm
define amdgpu_kernel void @test_call_undef() #0 {
@ -27,7 +27,7 @@ define i32 @test_tail_call_undef() #0 {
; GCN-LABEL: {{^}}test_call_null:
; SDAG: s_mov_b32 flat_scratch_lo, s13
; SDAG: s_add_u32 s12, s12, s17
; SDAG: s_add_i32 s12, s12, s17
; SDAG: s_lshr_b32
; GISEL: s_swappc_b64 s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}

View File

@ -59,8 +59,8 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa
; GCN: v_writelane_b32 v40, s33, 4
; GCN: s_mov_b32 s33, s32
; MUBUF: s_add_u32 s32, s32, 0x400
; FLATSCR: s_add_u32 s32, s32, 16
; MUBUF: s_addk_i32 s32, 0x400
; FLATSCR: s_add_i32 s32, s32, 16
; GCN: s_swappc_b64
; GCN-NEXT: s_swappc_b64

View File

@ -52,14 +52,14 @@ define void @callee_with_stack() #0 {
; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33
; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
; GCN-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_add_u32 s32, s32, 0x200
; FLATSCR-NEXT: s_add_u32 s32, s32, 8
; MUBUF-NEXT: s_addk_i32 s32, 0x200
; FLATSCR-NEXT: s_add_i32 s32, s32, 8
; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4{{$}}
; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x200
; FLATSCR-NEXT: s_sub_u32 s32, s32, 8
; MUBUF-NEXT: s_addk_i32 s32, 0xfe00
; FLATSCR-NEXT: s_add_i32 s32, s32, -8
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack_no_fp_elim_all() #1 {
@ -91,8 +91,8 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN: v_writelane_b32 [[CSR_VGPR]], s33, 2
; GCN-DAG: s_mov_b32 s33, s32
; MUBUF-DAG: s_add_u32 s32, s32, 0x400{{$}}
; FLATSCR-DAG: s_add_u32 s32, s32, 16{{$}}
; MUBUF-DAG: s_addk_i32 s32, 0x400{{$}}
; FLATSCR-DAG: s_add_i32 s32, s32, 16{{$}}
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30,
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
@ -107,8 +107,8 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
; FLATSCR-DAG: v_readlane_b32 s0, [[CSR_VGPR]]
; FLATSCR-DAG: v_readlane_b32 s1, [[CSR_VGPR]]
; MUBUF: s_sub_u32 s32, s32, 0x400{{$}}
; FLATSCR: s_sub_u32 s32, s32, 16{{$}}
; MUBUF: s_addk_i32 s32, 0xfc00{{$}}
; FLATSCR: s_add_i32 s32, s32, -16{{$}}
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@ -136,8 +136,8 @@ define void @callee_with_stack_and_call() #0 {
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; MUBUF-DAG: s_add_u32 s32, s32, 0x400
; FLATSCR-DAG: s_add_u32 s32, s32, 16
; MUBUF-DAG: s_addk_i32 s32, 0x400
; FLATSCR-DAG: s_add_i32 s32, s32, 16
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s33, [[FP_SPILL_LANE:[0-9]+]]
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
@ -149,8 +149,8 @@ define void @callee_with_stack_and_call() #0 {
; FLATSCR-DAG: v_readlane_b32 s0, v40, 0
; FLATSCR-DAG: v_readlane_b32 s1, v40, 1
; MUBUF: s_sub_u32 s32, s32, 0x400
; FLATSCR: s_sub_u32 s32, s32, 16
; MUBUF: s_addk_i32 s32, 0xfc00
; FLATSCR: s_add_i32 s32, s32, -16
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], [[FP_SPILL_LANE]]
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload
@ -251,11 +251,11 @@ define void @spill_only_csr_sgpr() {
; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
; MUBUF: s_add_u32 s32, s32, 0x300
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300
; MUBUF: s_addk_i32 s32, 0x300
; MUBUF-NEXT: s_addk_i32 s32, 0xfd00
; MUBUF-NEXT: s_mov_b32 s33, s4
; FLATSCR: s_add_u32 s32, s32, 12
; FLATSCR-NEXT: s_sub_u32 s32, s32, 12
; FLATSCR: s_add_i32 s32, s32, 12
; FLATSCR-NEXT: s_add_i32 s32, s32, -12
; FLATSCR-NEXT: s_mov_b32 s33, s0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
@ -284,10 +284,10 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
; GCN: ;;#ASMSTART
; GCN: v_writelane_b32 v1
; MUBUF: s_add_u32 s32, s32, 0x400
; MUBUF: s_sub_u32 s32, s32, 0x400
; FLATSCR: s_add_u32 s32, s32, 16
; FLATSCR: s_sub_u32 s32, s32, 16
; MUBUF: s_addk_i32 s32, 0x400
; MUBUF: s_addk_i32 s32, 0xfc00
; FLATSCR: s_add_i32 s32, s32, 16
; FLATSCR: s_add_i32 s32, s32, -16
; GCN-NEXT: v_readlane_b32 s33, v1, 63
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
@ -330,11 +330,11 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
; GCN: v_writelane_b32 v1,
; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
; MUBUF: s_add_u32 s32, s32, 0x400
; FLATSCR: s_add_u32 s32, s32, 16
; MUBUF: s_addk_i32 s32, 0x400
; FLATSCR: s_add_i32 s32, s32, 16
; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x400
; FLATSCR-NEXT: s_sub_u32 s32, s32, 16
; MUBUF-NEXT: s_addk_i32 s32, 0xfc00
; FLATSCR-NEXT: s_add_i32 s32, s32, -16
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
@ -362,18 +362,18 @@ define void @no_new_vgpr_for_fp_csr() #1 {
; GCN: s_waitcnt
; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33
; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
; MUBUF-NEXT: s_add_u32 s33, s32, 0x7ffc0
; FLATSCR-NEXT: s_add_u32 s33, s32, 0x1fff
; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0
; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
; MUBUF-NEXT: s_add_u32 s32, s32, 0x100000
; FLATSCR-NEXT: s_add_u32 s32, s32, 0x4000
; MUBUF-NEXT: s_add_i32 s32, s32, 0x100000
; FLATSCR-NEXT: s_addk_i32 s32, 0x4000
; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; MUBUF-NEXT: buffer_store_dword [[ZERO]], off, s[0:3], s33
; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], s33
; GCN-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x100000
; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x4000
; MUBUF-NEXT: s_add_i32 s32, s32, 0xfff00000
; FLATSCR-NEXT: s_addk_i32 s32, 0xc000
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
; GCN-NEXT: s_setpc_b64
define void @realign_stack_no_fp_elim() #1 {
@ -397,14 +397,14 @@ define void @realign_stack_no_fp_elim() #1 {
; FLATSCR: scratch_store_dword off, [[ZERO]], s33 offset:4
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN: ;;#ASMSTART
; MUBUF: s_add_u32 s32, s32, 0x300
; MUBUF: s_addk_i32 s32, 0x300
; MUBUF-NEXT: v_readlane_b32 s4, v1, 0
; MUBUF-NEXT: v_readlane_b32 s5, v1, 1
; FLATSCR: s_add_u32 s32, s32, 12
; FLATSCR: s_add_i32 s32, s32, 12
; FLATSCR-NEXT: v_readlane_b32 s0, v1, 0
; FLATSCR-NEXT: v_readlane_b32 s1, v1, 1
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300
; FLATSCR-NEXT: s_sub_u32 s32, s32, 12
; MUBUF-NEXT: s_addk_i32 s32, 0xfd00
; FLATSCR-NEXT: s_add_i32 s32, s32, -12
; GCN-NEXT: v_readlane_b32 s33, v1, 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
@ -441,16 +441,16 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
; MUBUF-DAG: buffer_store_dword
; FLATSCR-DAG: scratch_store_dword
; MUBUF: s_add_u32 s32, s32, 0x300{{$}}
; FLATSCR: s_add_u32 s32, s32, 12{{$}}
; MUBUF: s_addk_i32 s32, 0x300{{$}}
; FLATSCR: s_add_i32 s32, s32, 12{{$}}
; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0
; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0
; GCN: ;;#ASMSTART
; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1
; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300{{$}}
; FLATSCR-NEXT: s_sub_u32 s32, s32, 12{{$}}
; MUBUF-NEXT: s_addk_i32 s32, 0xfd00{{$}}
; FLATSCR-NEXT: s_add_i32 s32, s32, -12{{$}}
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
@ -483,17 +483,17 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; GCN-LABEL: {{^}}scratch_reg_needed_mubuf_offset:
; GCN: s_waitcnt
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
; FLATSCR-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
; GCN-DAG: s_mov_b32 s33, s32
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
; MUBUF-DAG: s_add_u32 s32, s32, 0x40300{{$}}
; FLATSCR-DAG: s_add_u32 s32, s32, 0x100c{{$}}
; MUBUF-DAG: s_add_i32 s32, s32, 0x40300{{$}}
; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}}
; MUBUF-DAG: buffer_store_dword
; FLATSCR-DAG: scratch_store_dword
@ -502,13 +502,13 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; GCN: ;;#ASMSTART
; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1
; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x40300{{$}}
; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x100c{{$}}
; MUBUF-NEXT: s_add_i32 s32, s32, 0xfffbfd00{{$}}
; FLATSCR-NEXT: s_addk_i32 s32, 0xeff4{{$}}
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Reload
; FLATSCR-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
@ -546,13 +546,13 @@ define internal void @local_empty_func() #0 {
; GCN-LABEL: {{^}}ipra_call_with_stack:
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN: s_mov_b32 s33, s32
; MUBUF: s_add_u32 s32, s32, 0x400
; FLATSCR: s_add_u32 s32, s32, 16
; MUBUF: s_addk_i32 s32, 0x400
; FLATSCR: s_add_i32 s32, s32, 16
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}}
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33{{$}}
; GCN: s_swappc_b64
; MUBUF: s_sub_u32 s32, s32, 0x400
; FLATSCR: s_sub_u32 s32, s32, 16
; MUBUF: s_addk_i32 s32, 0xfc00
; FLATSCR: s_add_i32 s32, s32, -16
; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]]
define void @ipra_call_with_stack() #0 {
%alloca = alloca i32, addrspace(5)
@ -666,13 +666,13 @@ define void @callee_need_to_spill_fp_to_reg() #1 {
; scratch VGPR to hold the offset.
; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
; MUBUF: s_or_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
; MUBUF: v_mov_b32_e32 v0, s33
; GCN-NOT: v_mov_b32_e32 v0, 0x100c
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40300
; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40300
; MUBUF: buffer_store_dword v0, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s33, 0x1004
; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s33, 0x1004
; FLATSCR: v_mov_b32_e32 v0, 0
; FLATSCR: scratch_store_dword off, v0, [[SOFF]]
define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #3 {

View File

@ -522,7 +522,7 @@ define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
; GCN-DAG: s_mov_b32 s33, s32
; GCN-DAG: s_add_u32 s32, s32, 0x400
; GCN-DAG: s_addk_i32 s32, 0x400
; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5]
; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7]

View File

@ -403,7 +403,7 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
; Requires loading and storing to stack slot.
; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}}
@ -411,7 +411,7 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
; GCN: s_swappc_b64
; GCN: s_sub_u32 s32, s32, 0x400{{$}}
; GCN: s_addk_i32 s32, 0xfc00{{$}}
; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN: s_setpc_b64
define void @too_many_args_call_too_many_args_use_workitem_id_x(

View File

@ -509,7 +509,7 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
; Requires loading and storing to stack slot.
; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}}
@ -517,7 +517,7 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
; GCN: s_swappc_b64
; GCN: s_sub_u32 s32, s32, 0x400{{$}}
; GCN: s_addk_i32 s32, 0xfc00{{$}}
; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN: s_setpc_b64
define void @too_many_args_call_too_many_args_use_workitem_id_x(

View File

@ -22,7 +22,7 @@ entry:
define amdgpu_kernel void @test_kern_stack() local_unnamed_addr #0 {
; GFX803-LABEL: test_kern_stack:
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_u32 s4, s4, s7
; GFX803-NEXT: s_add_i32 s4, s4, s7
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; GFX803-NEXT: s_add_u32 s0, s0, s7
; GFX803-NEXT: s_addc_u32 s1, s1, 0
@ -64,7 +64,7 @@ entry:
define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
; GFX803-LABEL: test_kern_call:
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_u32 s4, s4, s7
; GFX803-NEXT: s_add_i32 s4, s4, s7
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; GFX803-NEXT: s_add_u32 s0, s0, s7
; GFX803-NEXT: s_addc_u32 s1, s1, 0
@ -111,7 +111,7 @@ entry:
define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
; GFX803-LABEL: test_kern_stack_and_call:
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_u32 s4, s4, s7
; GFX803-NEXT: s_add_i32 s4, s4, s7
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; GFX803-NEXT: s_add_u32 s0, s0, s7
; GFX803-NEXT: s_addc_u32 s1, s1, 0
@ -188,7 +188,7 @@ entry:
define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 {
; GFX803-LABEL: test_force_fp_kern_stack:
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_u32 s4, s4, s7
; GFX803-NEXT: s_add_i32 s4, s4, s7
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; GFX803-NEXT: s_add_u32 s0, s0, s7
; GFX803-NEXT: s_mov_b32 s33, 0
@ -233,7 +233,7 @@ entry:
define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX803-LABEL: test_force_fp_kern_call:
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_u32 s4, s4, s7
; GFX803-NEXT: s_add_i32 s4, s4, s7
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; GFX803-NEXT: s_add_u32 s0, s0, s7
; GFX803-NEXT: s_addc_u32 s1, s1, 0
@ -283,7 +283,7 @@ entry:
define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 {
; GFX803-LABEL: test_force_fp_kern_stack_and_call:
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_u32 s4, s4, s7
; GFX803-NEXT: s_add_i32 s4, s4, s7
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; GFX803-NEXT: s_add_u32 s0, s0, s7
; GFX803-NEXT: s_mov_b32 s33, 0
@ -344,7 +344,7 @@ entry:
define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
; GFX803-LABEL: test_sgpr_offset_kernel:
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_add_u32 s4, s4, s7
; GFX803-NEXT: s_add_i32 s4, s4, s7
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; GFX803-NEXT: s_add_u32 s0, s0, s7
; GFX803-NEXT: s_addc_u32 s1, s1, 0

View File

@ -33,7 +33,7 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
; GCN-NEXT: v_writelane_b32 v40, s33, 2
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_v2f32@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, func_v2f32@rel32@hi+12
@ -41,7 +41,7 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -67,7 +67,7 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
; GCN-NEXT: v_writelane_b32 v40, s33, 2
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_v3f32@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, func_v3f32@rel32@hi+12
@ -75,7 +75,7 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -101,7 +101,7 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
; GCN-NEXT: v_writelane_b32 v40, s33, 2
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_v4f16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, func_v4f16@rel32@hi+12
@ -109,7 +109,7 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -135,7 +135,7 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
; GCN-NEXT: v_writelane_b32 v40, s33, 2
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_struct@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, func_struct@rel32@hi+12
@ -144,7 +144,7 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_mov_b32_e32 v1, v4
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload

View File

@ -228,10 +228,10 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-NEXT: s_and_b32 s0, s0, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: s_add_u32 s1, 4, s1
; GFX9-NEXT: s_add_i32 s1, s1, 4
; GFX9-NEXT: scratch_store_dword off, v0, s1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, 4, s0
; GFX9-NEXT: s_add_i32 s0, s0, 4
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -248,8 +248,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-NEXT: s_and_b32 s1, s0, 15
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
; GFX10-NEXT: s_add_u32 s0, 4, s0
; GFX10-NEXT: s_add_u32 s1, 4, s1
; GFX10-NEXT: s_add_i32 s0, s0, 4
; GFX10-NEXT: s_add_i32 s1, s1, 4
; GFX10-NEXT: scratch_store_dword off, v0, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -270,10 +270,10 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: s_add_u32 s1, 4, s1
; GFX9-PAL-NEXT: s_add_i32 s1, s1, 4
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_u32 s0, 4, s0
; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
@ -295,8 +295,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2
; GFX10-PAL-NEXT: s_add_u32 s0, 4, s0
; GFX10-PAL-NEXT: s_add_u32 s1, 4, s1
; GFX10-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX10-PAL-NEXT: s_add_i32 s1, s1, 4
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -321,13 +321,13 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s2, 2
; GFX9-NEXT: s_add_u32 s0, 4, s0
; GFX9-NEXT: s_add_i32 s0, s0, 4
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_and_b32 s0, s2, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: s_add_u32 s0, 4, s0
; GFX9-NEXT: s_add_i32 s0, s0, 4
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -342,8 +342,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX10-NEXT: v_mov_b32_e32 v0, 15
; GFX10-NEXT: s_lshl_b32 s1, s2, 2
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_add_u32 s1, 4, s1
; GFX10-NEXT: s_add_u32 s0, 4, s0
; GFX10-NEXT: s_add_i32 s1, s1, 4
; GFX10-NEXT: s_add_i32 s0, s0, 4
; GFX10-NEXT: scratch_store_dword off, v0, s1
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc
@ -363,10 +363,10 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: s_add_u32 s1, 4, s1
; GFX9-PAL-NEXT: s_add_i32 s1, s1, 4
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_u32 s0, 4, s0
; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
@ -386,8 +386,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2
; GFX10-PAL-NEXT: s_add_u32 s0, 4, s0
; GFX10-PAL-NEXT: s_add_u32 s1, 4, s1
; GFX10-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX10-PAL-NEXT: s_add_i32 s1, s1, 4
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -857,10 +857,10 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
; GFX9-NEXT: s_and_b32 s0, s0, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: s_add_u32 s1, 0x104, s1
; GFX9-NEXT: s_addk_i32 s1, 0x104
; GFX9-NEXT: scratch_store_dword off, v0, s1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, 0x104, s0
; GFX9-NEXT: s_addk_i32 s0, 0x104
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -879,8 +879,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
; GFX10-NEXT: s_and_b32 s1, s0, 15
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
; GFX10-NEXT: s_add_u32 s0, 0x104, s0
; GFX10-NEXT: s_add_u32 s1, 0x104, s1
; GFX10-NEXT: s_addk_i32 s0, 0x104
; GFX10-NEXT: s_addk_i32 s1, 0x104
; GFX10-NEXT: scratch_store_dword off, v0, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -904,10 +904,10 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX9-PAL-NEXT: s_add_u32 s1, 0x104, s1
; GFX9-PAL-NEXT: s_addk_i32 s1, 0x104
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_u32 s0, 0x104, s0
; GFX9-PAL-NEXT: s_addk_i32 s0, 0x104
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
@ -932,8 +932,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2
; GFX1010-PAL-NEXT: s_add_u32 s0, 0x104, s0
; GFX1010-PAL-NEXT: s_add_u32 s1, 0x104, s1
; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x104
; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x104
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -959,8 +959,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
; GFX1030-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2
; GFX1030-PAL-NEXT: s_add_u32 s0, 0x104, s0
; GFX1030-PAL-NEXT: s_add_u32 s1, 0x104, s1
; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x104
; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x104
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -991,13 +991,13 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
; GFX9-NEXT: s_lshl_b32 s0, s2, 2
; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, 0x104, s0
; GFX9-NEXT: s_addk_i32 s0, 0x104
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_and_b32 s0, s2, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: s_add_u32 s0, 0x104, s0
; GFX9-NEXT: s_addk_i32 s0, 0x104
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -1014,8 +1014,8 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
; GFX10-NEXT: v_mov_b32_e32 v0, 15
; GFX10-NEXT: s_lshl_b32 s1, s2, 2
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_add_u32 s1, 0x104, s1
; GFX10-NEXT: s_add_u32 s0, 0x104, s0
; GFX10-NEXT: s_addk_i32 s1, 0x104
; GFX10-NEXT: s_addk_i32 s0, 0x104
; GFX10-NEXT: scratch_store_dword off, v0, s1
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc
@ -1037,11 +1037,11 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: s_add_u32 s1, 0x104, s1
; GFX9-PAL-NEXT: s_addk_i32 s1, 0x104
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_u32 s0, 0x104, s0
; GFX9-PAL-NEXT: s_addk_i32 s0, 0x104
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
@ -1064,8 +1064,8 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2
; GFX1010-PAL-NEXT: s_add_u32 s0, 0x104, s0
; GFX1010-PAL-NEXT: s_add_u32 s1, 0x104, s1
; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x104
; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x104
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -1089,8 +1089,8 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2
; GFX1030-PAL-NEXT: s_add_u32 s0, 0x104, s0
; GFX1030-PAL-NEXT: s_add_u32 s1, 0x104, s1
; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x104
; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x104
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -1245,7 +1245,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x100
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100
; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi
; GFX9-NEXT: v_mov_b32_e32 v3, 15
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
@ -1262,7 +1262,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x100
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
@ -1280,7 +1280,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x100
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x100
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi
; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
@ -1297,7 +1297,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x100
; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo
; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
@ -1495,13 +1495,13 @@ define void @zero_init_large_offset_foo() {
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_mov_b32_e32 v2, s2
; GFX9-NEXT: v_mov_b32_e32 v3, s3
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@ -1513,7 +1513,7 @@ define void @zero_init_large_offset_foo() {
; GFX10-NEXT: scratch_load_dword v0, off, s32 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_mov_b32 s0, 0
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX10-NEXT: s_mov_b32 s1, s0
; GFX10-NEXT: s_mov_b32 s2, s0
; GFX10-NEXT: s_mov_b32 s3, s0
@ -1522,11 +1522,11 @@ define void @zero_init_large_offset_foo() {
; GFX10-NEXT: v_mov_b32_e32 v2, s2
; GFX10-NEXT: v_mov_b32_e32 v3, s3
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
@ -1544,13 +1544,13 @@ define void @zero_init_large_offset_foo() {
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2
; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
@ -1562,7 +1562,7 @@ define void @zero_init_large_offset_foo() {
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc
; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX1010-PAL-NEXT: s_mov_b32 s0, 0
; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX1010-PAL-NEXT: s_mov_b32 s1, s0
; GFX1010-PAL-NEXT: s_mov_b32 s2, s0
; GFX1010-PAL-NEXT: s_mov_b32 s3, s0
@ -1572,13 +1572,13 @@ define void @zero_init_large_offset_foo() {
; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo
; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3
; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3
; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3
; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-PAL-NEXT: s_setpc_b64 s[30:31]
@ -1590,7 +1590,7 @@ define void @zero_init_large_offset_foo() {
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc
; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX1030-PAL-NEXT: s_mov_b32 s0, 0
; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX1030-PAL-NEXT: s_mov_b32 s1, s0
; GFX1030-PAL-NEXT: s_mov_b32 s2, s0
; GFX1030-PAL-NEXT: s_mov_b32 s3, s0
@ -1599,11 +1599,11 @@ define void @zero_init_large_offset_foo() {
; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2
; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo
; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-PAL-NEXT: s_setpc_b64 s[30:31]
@ -1629,10 +1629,10 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
; GFX9-NEXT: s_and_b32 s0, s0, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: s_add_u32 s1, 0x4004, s1
; GFX9-NEXT: s_addk_i32 s1, 0x4004
; GFX9-NEXT: scratch_store_dword off, v0, s1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
; GFX9-NEXT: s_addk_i32 s0, 0x4004
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -1651,8 +1651,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
; GFX10-NEXT: s_and_b32 s1, s0, 15
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
; GFX10-NEXT: s_add_u32 s0, 0x4004, s0
; GFX10-NEXT: s_add_u32 s1, 0x4004, s1
; GFX10-NEXT: s_addk_i32 s0, 0x4004
; GFX10-NEXT: s_addk_i32 s1, 0x4004
; GFX10-NEXT: scratch_store_dword off, v0, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -1676,10 +1676,10 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX9-PAL-NEXT: s_add_u32 s1, 0x4004, s1
; GFX9-PAL-NEXT: s_addk_i32 s1, 0x4004
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_u32 s0, 0x4004, s0
; GFX9-PAL-NEXT: s_addk_i32 s0, 0x4004
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
@ -1704,8 +1704,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2
; GFX1010-PAL-NEXT: s_add_u32 s0, 0x4004, s0
; GFX1010-PAL-NEXT: s_add_u32 s1, 0x4004, s1
; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x4004
; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x4004
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -1731,8 +1731,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
; GFX1030-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2
; GFX1030-PAL-NEXT: s_add_u32 s0, 0x4004, s0
; GFX1030-PAL-NEXT: s_add_u32 s1, 0x4004, s1
; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x4004
; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x4004
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -1763,13 +1763,13 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
; GFX9-NEXT: s_lshl_b32 s0, s2, 2
; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
; GFX9-NEXT: s_addk_i32 s0, 0x4004
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_and_b32 s0, s2, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
; GFX9-NEXT: s_addk_i32 s0, 0x4004
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@ -1786,8 +1786,8 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
; GFX10-NEXT: v_mov_b32_e32 v0, 15
; GFX10-NEXT: s_lshl_b32 s1, s2, 2
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_add_u32 s1, 0x4004, s1
; GFX10-NEXT: s_add_u32 s0, 0x4004, s0
; GFX10-NEXT: s_addk_i32 s1, 0x4004
; GFX10-NEXT: s_addk_i32 s0, 0x4004
; GFX10-NEXT: scratch_store_dword off, v0, s1
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc
@ -1809,11 +1809,11 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: s_add_u32 s1, 0x4004, s1
; GFX9-PAL-NEXT: s_addk_i32 s1, 0x4004
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_u32 s0, 0x4004, s0
; GFX9-PAL-NEXT: s_addk_i32 s0, 0x4004
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
@ -1836,8 +1836,8 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2
; GFX1010-PAL-NEXT: s_add_u32 s0, 0x4004, s0
; GFX1010-PAL-NEXT: s_add_u32 s1, 0x4004, s1
; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x4004
; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x4004
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -1861,8 +1861,8 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2
; GFX1030-PAL-NEXT: s_add_u32 s0, 0x4004, s0
; GFX1030-PAL-NEXT: s_add_u32 s1, 0x4004, s1
; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x4004
; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x4004
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@ -2017,7 +2017,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi
; GFX9-NEXT: v_mov_b32_e32 v3, 15
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
@ -2034,7 +2034,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
@ -2052,7 +2052,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi
; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
@ -2069,7 +2069,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000
; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo
; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
@ -2107,7 +2107,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX9-NEXT: s_mov_b32 vcc_hi, 0
; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, 4, s0
; GFX9-NEXT: s_add_i32 s0, s0, 4
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-NEXT: s_waitcnt vmcnt(0)
@ -2124,7 +2124,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3800
; GFX10-NEXT: s_add_u32 s0, 4, s0
; GFX10-NEXT: s_add_i32 s0, s0, 4
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
@ -2147,7 +2147,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; GFX9-PAL-NEXT: scratch_store_dword off, v0, vcc_hi offset:4
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_u32 s0, 4, s0
; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
@ -2170,7 +2170,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3800
; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0
; GFX1010-PAL-NEXT: s_add_u32 s0, 4, s0
; GFX1010-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, vcc_lo offset:4
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
@ -2193,7 +2193,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3800
; GFX1030-PAL-NEXT: s_add_u32 s0, 4, s0
; GFX1030-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, off offset:4
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
@ -2220,7 +2220,7 @@ define void @store_load_large_imm_offset_foo() {
; GFX9-NEXT: v_mov_b32_e32 v0, 13
; GFX9-NEXT: scratch_store_dword off, v0, s32
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, s32, s0
; GFX9-NEXT: s_add_i32 s0, s0, s32
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-NEXT: s_waitcnt vmcnt(0)
@ -2235,7 +2235,7 @@ define void @store_load_large_imm_offset_foo() {
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3800
; GFX10-NEXT: s_add_u32 s0, s32, s0
; GFX10-NEXT: s_add_i32 s0, s0, s32
; GFX10-NEXT: scratch_store_dword off, v0, s32
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
@ -2251,7 +2251,7 @@ define void @store_load_large_imm_offset_foo() {
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_u32 s0, s32, s0
; GFX9-PAL-NEXT: s_add_i32 s0, s0, s32
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
@ -2266,7 +2266,7 @@ define void @store_load_large_imm_offset_foo() {
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800
; GFX10-PAL-NEXT: s_add_u32 s0, s32, s0
; GFX10-PAL-NEXT: s_add_i32 s0, s0, s32
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664

View File

@ -37,7 +37,7 @@ define void @func_mov_fi_i32() #0 {
; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX9-FLATSCR: v_mov_b32_e32 v0, s32
; GFX9-FLATSCR: s_add_u32 [[ADD:[^,]+]], s32, 4
; GFX9-FLATSCR: s_add_i32 [[ADD:[^,]+]], s32, 4
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
@ -196,7 +196,7 @@ ret:
; GFX9-MUBUF-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
; GFX9-MUBUF: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]]
; GFX9-FLATSCR-DAG: s_add_u32 [[SZ:[^,]+]], s32, 0x200
; GFX9-FLATSCR-DAG: s_add_i32 [[SZ:[^,]+]], s32, 0x200
; GFX9-FLATSCR: v_mov_b32_e32 [[VZ:v[0-9]+]], [[SZ]]
; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9
@ -222,7 +222,7 @@ define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
; GFX9-MUBUF-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
; GFX9-MUBUF: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]]
; GFX9-FLATSCR-DAG: s_add_u32 [[SZ:[^,]+]], s32, 0x200
; GFX9-FLATSCR-DAG: s_add_i32 [[SZ:[^,]+]], s32, 0x200
; GFX9-FLATSCR: v_mov_b32_e32 [[VZ:v[0-9]+]], [[SZ]]
; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9

View File

@ -16,7 +16,7 @@ define void @callee_with_stack_and_call() #0 {
; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s33, 2
; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s30, 0
; SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32
; SPILL-TO-VGPR-NEXT: s_add_u32 s32, s32, 0x400
; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400
; SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0
; SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5]
; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
@ -27,7 +27,7 @@ define void @callee_with_stack_and_call() #0 {
; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5]
; SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 0
; SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v40, 1
; SPILL-TO-VGPR-NEXT: s_sub_u32 s32, s32, 0x400
; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xfc00
; SPILL-TO-VGPR-NEXT: v_readlane_b32 s33, v40, 2
; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1
; SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@ -41,7 +41,7 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, s33
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32
; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s32, s32, 0x800
; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16
@ -68,7 +68,7 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7]
; NO-SPILL-TO-VGPR-NEXT: s_sub_u32 s32, s32, 0x800
; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xf800
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s33, v0

File diff suppressed because it is too large Load Diff

View File

@ -16,7 +16,7 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e
; GFX9-NEXT: v_writelane_b32 v40, s35, 1
; GFX9-NEXT: v_writelane_b32 v40, s30, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[34:35]
; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
@ -29,7 +29,7 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e
; GFX9-NEXT: v_readlane_b32 s5, v40, 3
; GFX9-NEXT: v_readlane_b32 s35, v40, 1
; GFX9-NEXT: v_readlane_b32 s34, v40, 0
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 4
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -47,7 +47,7 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s34, 0
; GFX10-NEXT: v_writelane_b32 v40, s35, 1
; GFX10-NEXT: s_getpc_b64 s[34:35]
@ -63,7 +63,7 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e
; GFX10-NEXT: v_readlane_b32 s5, v40, 3
; GFX10-NEXT: v_readlane_b32 s35, v40, 1
; GFX10-NEXT: v_readlane_b32 s34, v40, 0
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 4
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -111,7 +111,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)
; GFX9-NEXT: v_writelane_b32 v40, s34, 0
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s31, 2
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s31
@ -128,7 +128,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s5, v40, 2
; GFX9-NEXT: v_readlane_b32 s34, v40, 0
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 3
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -146,7 +146,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
@ -165,7 +165,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s5, v40, 2
; GFX10-NEXT: v_readlane_b32 s34, v40, 0
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 3
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -189,7 +189,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)
; GFX9-NEXT: v_writelane_b32 v41, s33, 2
; GFX9-NEXT: v_writelane_b32 v41, s30, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def v31
@ -207,7 +207,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s4, v41, 0
; GFX9-NEXT: v_readlane_b32 s5, v41, 1
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v41, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@ -225,7 +225,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v41, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def v31
@ -244,7 +244,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT: v_readlane_b32 s4, v41, 0
; GFX10-NEXT: v_readlane_b32 s5, v41, 1
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v41, 2
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@ -270,7 +270,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)*
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: v_writelane_b32 v40, s33, 0
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
@ -285,7 +285,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)*
; GFX9-NEXT: v_readlane_b32 s4, v40, 1
; GFX9-NEXT: v_readlane_b32 s33, v40, 0
; GFX9-NEXT: v_readlane_b32 s5, v40, 2
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 3
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -303,7 +303,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)*
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
@ -320,7 +320,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)*
; GFX10-NEXT: v_readlane_b32 s4, v40, 1
; GFX10-NEXT: v_readlane_b32 s33, v40, 0
; GFX10-NEXT: v_readlane_b32 s5, v40, 2
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 3
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -345,7 +345,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)*
; GFX9-NEXT: v_writelane_b32 v40, s34, 0
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
@ -360,7 +360,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)*
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s5, v40, 2
; GFX9-NEXT: v_readlane_b32 s34, v40, 0
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 3
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -378,7 +378,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)*
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
@ -395,7 +395,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)*
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s5, v40, 2
; GFX10-NEXT: v_readlane_b32 s34, v40, 0
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 3
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -419,7 +419,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)*
; GFX9-NEXT: v_writelane_b32 v41, s33, 2
; GFX9-NEXT: v_writelane_b32 v41, s30, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
@ -435,7 +435,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)*
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s4, v41, 0
; GFX9-NEXT: v_readlane_b32 s5, v41, 1
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v41, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@ -453,7 +453,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)*
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v41, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
@ -470,7 +470,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)*
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT: v_readlane_b32 s4, v41, 0
; GFX10-NEXT: v_readlane_b32 s5, v41, 1
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v41, 2
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@ -578,7 +578,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12
@ -586,7 +586,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -604,7 +604,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12
@ -613,7 +613,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -635,7 +635,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12
@ -643,7 +643,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -661,7 +661,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12
@ -670,7 +670,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -693,7 +693,7 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
; GFX9-NEXT: v_writelane_b32 v40, s40, 0
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
@ -708,7 +708,7 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s5, v40, 2
; GFX9-NEXT: v_readlane_b32 s40, v40, 0
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 3
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -726,7 +726,7 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
@ -743,7 +743,7 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s5, v40, 2
; GFX10-NEXT: v_readlane_b32 s40, v40, 0
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 3
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -768,7 +768,7 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
; GFX9-NEXT: v_writelane_b32 v41, s40, 0
; GFX9-NEXT: v_writelane_b32 v41, s30, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s40
@ -792,7 +792,7 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
; GFX9-NEXT: v_readlane_b32 s4, v41, 1
; GFX9-NEXT: v_readlane_b32 s5, v41, 2
; GFX9-NEXT: v_readlane_b32 s40, v41, 0
; GFX9-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v41, 3
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@ -810,7 +810,7 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v41, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_add_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
@ -836,7 +836,7 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
; GFX10-NEXT: v_readlane_b32 s4, v41, 1
; GFX10-NEXT: v_readlane_b32 s5, v41, 2
; GFX10-NEXT: v_readlane_b32 s40, v41, 0
; GFX10-NEXT: s_sub_u32 s32, s32, 0x200
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v41, 3
; GFX10-NEXT: s_or_saveexec_b32 s6, -1
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload

View File

@ -1242,9 +1242,9 @@ define amdgpu_gfx void @call_512xi32() #0 {
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s8, s33
; GFX9-NEXT: s_add_u32 s33, s32, 0x1ffc0
; GFX9-NEXT: s_add_i32 s33, s32, 0x1ffc0
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffe0000
; GFX9-NEXT: s_add_u32 s32, s32, 0x60000
; GFX9-NEXT: s_add_i32 s32, s32, 0x60000
; GFX9-NEXT: s_getpc_b64 s[6:7]
; GFX9-NEXT: s_add_u32 s6, s6, return_512xi32@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s7, s7, return_512xi32@gotpcrel32@hi+12
@ -1253,7 +1253,7 @@ define amdgpu_gfx void @call_512xi32() #0 {
; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX9-NEXT: s_sub_u32 s32, s32, 0x60000
; GFX9-NEXT: s_add_i32 s32, s32, 0xfffa0000
; GFX9-NEXT: s_mov_b32 s33, s8
; GFX9-NEXT: s_setpc_b64 s[4:5]
;
@ -1262,10 +1262,10 @@ define amdgpu_gfx void @call_512xi32() #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s8, s33
; GFX10-NEXT: s_add_u32 s33, s32, 0xffe0
; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0
; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31]
; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000
; GFX10-NEXT: s_add_u32 s32, s32, 0x30000
; GFX10-NEXT: s_add_i32 s32, s32, 0x30000
; GFX10-NEXT: s_getpc_b64 s[6:7]
; GFX10-NEXT: s_add_u32 s6, s6, return_512xi32@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s7, s7, return_512xi32@gotpcrel32@hi+12
@ -1273,7 +1273,7 @@ define amdgpu_gfx void @call_512xi32() #0 {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX10-NEXT: s_sub_u32 s32, s32, 0x30000
; GFX10-NEXT: s_add_i32 s32, s32, 0xfffd0000
; GFX10-NEXT: s_mov_b32 s33, s8
; GFX10-NEXT: s_setpc_b64 s[4:5]
entry:

View File

@ -77,7 +77,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr() {
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-NEXT: s_add_u32 s12, s12, s17
; GCN-NEXT: s_add_i32 s12, s12, s17
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GCN-NEXT: s_add_u32 s0, s0, s17
; GCN-NEXT: s_addc_u32 s1, s1, 0
@ -173,7 +173,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg() {
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-NEXT: s_add_u32 s12, s12, s17
; GCN-NEXT: s_add_i32 s12, s12, s17
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GCN-NEXT: s_add_u32 s0, s0, s17
; GCN-NEXT: s_addc_u32 s1, s1, 0
@ -206,7 +206,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: s_mov_b64 exec, s[16:17]
; GCN-NEXT: v_writelane_b32 v43, s33, 17
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x800
; GCN-NEXT: s_addk_i32 s32, 0x800
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
@ -276,7 +276,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_sub_u32 s32, s32, 0x800
; GCN-NEXT: s_addk_i32 s32, 0xf800
; GCN-NEXT: v_readlane_b32 s33, v43, 17
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
@ -296,7 +296,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GCN-NEXT: s_mov_b64 exec, s[16:17]
; GCN-NEXT: v_writelane_b32 v43, s33, 17
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x800
; GCN-NEXT: s_addk_i32 s32, 0x800
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
@ -367,7 +367,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_sub_u32 s32, s32, 0x800
; GCN-NEXT: s_addk_i32 s32, 0xf800
; GCN-NEXT: v_readlane_b32 s33, v43, 17
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
@ -387,7 +387,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GCN-NEXT: s_mov_b64 exec, s[16:17]
; GCN-NEXT: v_writelane_b32 v43, s33, 17
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x800
; GCN-NEXT: s_addk_i32 s32, 0x800
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
@ -458,7 +458,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_sub_u32 s32, s32, 0x800
; GCN-NEXT: s_addk_i32 s32, 0xf800
; GCN-NEXT: v_readlane_b32 s33, v43, 17
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
@ -479,7 +479,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GCN-NEXT: s_mov_b64 exec, s[16:17]
; GCN-NEXT: v_writelane_b32 v43, s33, 19
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x800
; GCN-NEXT: s_addk_i32 s32, 0x800
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
@ -560,7 +560,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_sub_u32 s32, s32, 0x800
; GCN-NEXT: s_addk_i32 s32, 0xf800
; GCN-NEXT: v_readlane_b32 s33, v43, 19
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
@ -587,7 +587,7 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 v42, s33, 6
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: v_writelane_b32 v42, s34, 0
@ -618,7 +618,7 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
; GCN-NEXT: v_readlane_b32 s34, v42, 0
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v42, 6
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload

View File

@ -70,7 +70,7 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: BB0_1: ; %loadstoreloop
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
; FLATSCR-NEXT: s_add_u32 s3, 0x3000, s2
; FLATSCR-NEXT: s_add_i32 s3, s2, 0x3000
; FLATSCR-NEXT: s_add_i32 s2, s2, 1
; FLATSCR-NEXT: s_cmpk_lt_u32 s2, 0x2120
; FLATSCR-NEXT: scratch_store_byte off, v0, s3
@ -78,7 +78,7 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
; FLATSCR-NEXT: s_cbranch_scc1 BB0_1
; FLATSCR-NEXT: ; %bb.2: ; %split
; FLATSCR-NEXT: s_movk_i32 s2, 0x2000
; FLATSCR-NEXT: s_add_u32 s2, 0x3000, s2
; FLATSCR-NEXT: s_addk_i32 s2, 0x3000
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s2 offset:208 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_movk_i32 s2, 0x3000
@ -111,14 +111,14 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
; MUBUF: ; %bb.0: ; %entry
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_mov_b32 s5, s33
; MUBUF-NEXT: s_add_u32 s33, s32, 0x7ffc0
; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33
; MUBUF-NEXT: v_add_u32_e32 v3, 0x1000, v3
; MUBUF-NEXT: v_mov_b32_e32 v4, 0
; MUBUF-NEXT: v_add_u32_e32 v2, 64, v3
; MUBUF-NEXT: s_mov_b32 s4, 0
; MUBUF-NEXT: s_add_u32 s32, s32, 0x180000
; MUBUF-NEXT: s_add_i32 s32, s32, 0x180000
; MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], s33
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: BB1_1: ; %loadstoreloop
@ -141,7 +141,7 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:4 glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x180000
; MUBUF-NEXT: s_add_i32 s32, s32, 0xffe80000
; MUBUF-NEXT: s_mov_b32 s33, s5
; MUBUF-NEXT: v_add_co_u32_e32 v2, vcc, v4, v6
; MUBUF-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v7, vcc
@ -153,17 +153,17 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
; FLATSCR: ; %bb.0: ; %entry
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s2, s33
; FLATSCR-NEXT: s_add_u32 s33, s32, 0x1fff
; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff
; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
; FLATSCR-NEXT: v_mov_b32_e32 v2, 0
; FLATSCR-NEXT: s_mov_b32 s0, 0
; FLATSCR-NEXT: s_add_u32 s32, s32, 0x6000
; FLATSCR-NEXT: s_addk_i32 s32, 0x6000
; FLATSCR-NEXT: scratch_store_dword off, v2, s33
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: BB1_1: ; %loadstoreloop
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
; FLATSCR-NEXT: s_add_u32 vcc_hi, s33, 0x1000
; FLATSCR-NEXT: s_add_u32 s1, vcc_hi, s0
; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x1000
; FLATSCR-NEXT: s_add_i32 s1, s0, vcc_hi
; FLATSCR-NEXT: s_add_i32 s0, s0, 1
; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120
; FLATSCR-NEXT: scratch_store_byte off, v2, s1
@ -171,14 +171,14 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
; FLATSCR-NEXT: s_cbranch_scc1 BB1_1
; FLATSCR-NEXT: ; %bb.2: ; %split
; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
; FLATSCR-NEXT: s_add_u32 s1, s33, 0x1000
; FLATSCR-NEXT: s_add_u32 s0, s1, s0
; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000
; FLATSCR-NEXT: s_add_i32 s0, s0, s1
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:208 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_add_u32 s0, s33, 0x1000
; FLATSCR-NEXT: s_add_i32 s0, s33, 0x1000
; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s0 offset:64 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x6000
; FLATSCR-NEXT: s_addk_i32 s32, 0xa000
; FLATSCR-NEXT: s_mov_b32 s33, s2
; FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
@ -286,7 +286,7 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: BB2_1: ; %loadstoreloop
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
; FLATSCR-NEXT: s_add_u32 s3, 0x2000, s2
; FLATSCR-NEXT: s_add_i32 s3, s2, 0x2000
; FLATSCR-NEXT: s_add_i32 s2, s2, 1
; FLATSCR-NEXT: s_cmpk_lt_u32 s2, 0x2120
; FLATSCR-NEXT: scratch_store_byte off, v0, s3
@ -294,7 +294,7 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1
; FLATSCR-NEXT: s_cbranch_scc1 BB2_1
; FLATSCR-NEXT: ; %bb.2: ; %split
; FLATSCR-NEXT: s_movk_i32 s2, 0x1000
; FLATSCR-NEXT: s_add_u32 s2, 0x2000, s2
; FLATSCR-NEXT: s_addk_i32 s2, 0x2000
; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s2 offset:720 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 offset:704 glc

View File

@ -193,7 +193,7 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: v_writelane_b32 v43, s33, 4
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x800
; GFX9-NEXT: s_addk_i32 s32, 0x800
; GFX9-NEXT: v_writelane_b32 v43, s34, 0
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4
@ -223,7 +223,7 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
; GFX9-NEXT: v_readlane_b32 s5, v43, 3
; GFX9-NEXT: v_readlane_b32 s35, v43, 1
; GFX9-NEXT: v_readlane_b32 s34, v43, 0
; GFX9-NEXT: s_sub_u32 s32, s32, 0x800
; GFX9-NEXT: s_addk_i32 s32, 0xf800
; GFX9-NEXT: v_readlane_b32 s33, v43, 4
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload

View File

@ -8,11 +8,11 @@ define hidden fastcc void @callee_has_fp() #1 {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s4, s33
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_add_u32 s32, s32, 0x200
; CHECK-NEXT: s_addk_i32 s32, 0x200
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_sub_u32 s32, s32, 0x200
; CHECK-NEXT: s_addk_i32 s32, 0xfe00
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
@ -29,7 +29,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s8, s33
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_add_u32 s32, s32, 0x400
; CHECK-NEXT: s_addk_i32 s32, 0x400
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, callee_has_fp@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp@rel32@hi+12
@ -40,7 +40,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
; CHECK-NEXT: ; clobber csr v40
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_sub_u32 s32, s32, 0x400
; CHECK-NEXT: s_addk_i32 s32, 0xfc00
; CHECK-NEXT: s_mov_b32 s33, s8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[6:7]

View File

@ -16,7 +16,7 @@ declare void @external_void_func_i32(i32) #0
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-DAG: v_writelane_b32 v40, s33, 2
; GCN-DAG: s_mov_b32 s33, s32
; GCN-DAG: s_add_u32 s32, s32, 0x400
; GCN-DAG: s_addk_i32 s32, 0x400
; GCN-DAG: v_writelane_b32 v40, s30, 0
; GCN-DAG: v_writelane_b32 v40, s31, 1
@ -25,7 +25,7 @@ declare void @external_void_func_i32(i32) #0
; GCN: v_readlane_b32 s4, v40, 0
; GCN: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -40,10 +40,10 @@ define void @test_func_call_external_void_func_i32_imm() #0 {
; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use:
; GCN: s_waitcnt
; GCN: s_mov_b32 s33, s32
; GCN-DAG: s_add_u32 s32, s32, 0x1400{{$}}
; GCN-DAG: s_addk_i32 s32, 0x1400{{$}}
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:
; GCN: s_swappc_b64
; GCN: s_sub_u32 s32, s32, 0x1400{{$}}
; GCN: s_addk_i32 s32, 0xec00{{$}}
; GCN: s_setpc_b64
define void @test_func_call_external_void_func_i32_imm_stack_use() #0 {
%alloca = alloca [16 x i32], align 4, addrspace(5)

View File

@ -65,16 +65,14 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; FLATSCR-NEXT: s_cmp_lg_u32 s5, 0
; FLATSCR-NEXT: s_cbranch_scc1 BB0_3
; FLATSCR-NEXT: ; %bb.2: ; %bb.1
; FLATSCR-NEXT: s_mov_b32 s2, s32
; FLATSCR-NEXT: s_add_i32 s3, s2, 0x1000
; FLATSCR-NEXT: s_add_i32 s2, s32, 0x1000
; FLATSCR-NEXT: v_mov_b32_e32 v1, 0
; FLATSCR-NEXT: s_add_u32 s2, s2, 0x1000
; FLATSCR-NEXT: v_mov_b32_e32 v2, 1
; FLATSCR-NEXT: s_lshl_b32 s3, s6, 2
; FLATSCR-NEXT: s_mov_b32 s32, s2
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s2
; FLATSCR-NEXT: s_lshl_b32 s2, s6, 2
; FLATSCR-NEXT: s_mov_b32 s32, s3
; FLATSCR-NEXT: s_add_i32 s3, s3, s2
; FLATSCR-NEXT: scratch_load_dword v2, off, s3
; FLATSCR-NEXT: s_add_i32 s2, s2, s3
; FLATSCR-NEXT: scratch_load_dword v2, off, s2
; FLATSCR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: v_add_u32_e32 v0, v2, v0
@ -221,7 +219,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
; MUBUF-NEXT: s_mov_b32 s7, s33
; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_add_u32 s32, s32, 0x400
; MUBUF-NEXT: s_addk_i32 s32, 0x400
; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc
; MUBUF-NEXT: s_cbranch_execz BB2_3
; MUBUF-NEXT: ; %bb.1: ; %bb.0
@ -247,17 +245,17 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
; MUBUF-NEXT: global_store_dword v[0:1], v0, off
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x400
; MUBUF-NEXT: s_addk_i32 s32, 0xfc00
; MUBUF-NEXT: s_mov_b32 s33, s7
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: func_non_entry_block_static_alloca_align4:
; FLATSCR: ; %bb.0: ; %entry
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s4, s33
; FLATSCR-NEXT: s_mov_b32 s3, s33
; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_add_u32 s32, s32, 16
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc
; FLATSCR-NEXT: s_cbranch_execz BB2_3
; FLATSCR-NEXT: ; %bb.1: ; %bb.0
@ -265,16 +263,14 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
; FLATSCR-NEXT: s_and_b64 exec, exec, vcc
; FLATSCR-NEXT: s_cbranch_execz BB2_3
; FLATSCR-NEXT: ; %bb.2: ; %bb.1
; FLATSCR-NEXT: s_mov_b32 s2, s32
; FLATSCR-NEXT: s_add_i32 s3, s2, 0x1000
; FLATSCR-NEXT: s_add_u32 s2, s2, 0x1000
; FLATSCR-NEXT: s_add_i32 s2, s32, 0x1000
; FLATSCR-NEXT: v_mov_b32_e32 v2, 0
; FLATSCR-NEXT: v_mov_b32_e32 v3, 1
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[2:3], s2
; FLATSCR-NEXT: v_lshl_add_u32 v2, v4, 2, s3
; FLATSCR-NEXT: v_lshl_add_u32 v2, v4, 2, s2
; FLATSCR-NEXT: scratch_load_dword v2, v2, off
; FLATSCR-NEXT: v_and_b32_e32 v3, 0x3ff, v5
; FLATSCR-NEXT: s_mov_b32 s32, s3
; FLATSCR-NEXT: s_mov_b32 s32, s2
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: v_add_u32_e32 v2, v2, v3
; FLATSCR-NEXT: global_store_dword v[0:1], v2, off
@ -283,8 +279,8 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: global_store_dword v[0:1], v0, off
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_sub_u32 s32, s32, 16
; FLATSCR-NEXT: s_mov_b32 s33, s4
; FLATSCR-NEXT: s_add_i32 s32, s32, -16
; FLATSCR-NEXT: s_mov_b32 s33, s3
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
entry:
@ -319,10 +315,10 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
; MUBUF: ; %bb.0: ; %entry
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_mov_b32 s7, s33
; MUBUF-NEXT: s_add_u32 s33, s32, 0xfc0
; MUBUF-NEXT: s_add_i32 s33, s32, 0xfc0
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfffff000
; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; MUBUF-NEXT: s_add_u32 s32, s32, 0x2000
; MUBUF-NEXT: s_addk_i32 s32, 0x2000
; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc
; MUBUF-NEXT: s_cbranch_execz BB3_2
; MUBUF-NEXT: ; %bb.1: ; %bb.0
@ -345,7 +341,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
; MUBUF-NEXT: global_store_dword v[0:1], v0, off
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_sub_u32 s32, s32, 0x2000
; MUBUF-NEXT: s_addk_i32 s32, 0xe000
; MUBUF-NEXT: s_mov_b32 s33, s7
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
@ -353,10 +349,10 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
; FLATSCR: ; %bb.0: ; %entry
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s3, s33
; FLATSCR-NEXT: s_add_u32 s33, s32, 63
; FLATSCR-NEXT: s_add_i32 s33, s32, 63
; FLATSCR-NEXT: s_andn2_b32 s33, s33, 63
; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; FLATSCR-NEXT: s_add_u32 s32, s32, 0x80
; FLATSCR-NEXT: s_addk_i32 s32, 0x80
; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc
; FLATSCR-NEXT: s_cbranch_execz BB3_2
; FLATSCR-NEXT: ; %bb.1: ; %bb.0
@ -377,7 +373,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: global_store_dword v[0:1], v0, off
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x80
; FLATSCR-NEXT: s_addk_i32 s32, 0xff80
; FLATSCR-NEXT: s_mov_b32 s33, s3
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
entry:

View File

@ -29,25 +29,25 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
; CHECK: liveins: $vgpr1, $vgpr2
; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
; CHECK: $vgpr3 = COPY killed $sgpr33
; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK: S_ENDPGM 0, implicit $vcc
@ -81,18 +81,18 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
; CHECK: liveins: $sgpr29, $vgpr1
; CHECK: $sgpr29 = frame-setup COPY $sgpr33
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
; CHECK: $vgpr2 = COPY killed $sgpr33
; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
; CHECK: $sgpr33 = frame-destroy COPY $sgpr29
; CHECK: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
@ -125,16 +125,16 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64
; CHECK: liveins: $sgpr28, $vgpr1
; CHECK: $sgpr28 = frame-setup COPY $sgpr33
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $sgpr29 = S_ADD_U32 killed $sgpr29, 8192, implicit-def $scc
; CHECK: $sgpr29 = S_ADD_I32 killed $sgpr29, 8192, implicit-def $scc
; CHECK: $vgpr2 = COPY killed $sgpr29
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
; CHECK: $sgpr33 = frame-destroy COPY $sgpr28
; CHECK: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
@ -166,16 +166,16 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc
; CHECK: liveins: $sgpr28, $vgpr1
; CHECK: $sgpr28 = frame-setup COPY $sgpr33
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $vcc_lo = S_MOV_B32 8192
; CHECK: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
; CHECK: $sgpr33 = frame-destroy COPY $sgpr28
; CHECK: S_ENDPGM 0
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31

View File

@ -25,44 +25,44 @@ body: |
; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs
; MUBUF: liveins: $vgpr1, $vgpr2
; MUBUF: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; MUBUF: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
; MUBUF: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
; MUBUF: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; MUBUF: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; MUBUF: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; MUBUF: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; MUBUF: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; MUBUF: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
; MUBUF: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; MUBUF: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; MUBUF: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; MUBUF: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec
; MUBUF: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; MUBUF: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; MUBUF: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
; MUBUF: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; MUBUF: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; MUBUF: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
; MUBUF: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
; MUBUF: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; MUBUF: S_ENDPGM 0, implicit $vcc
; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs
; FLATSCR: liveins: $vgpr1, $vgpr2
; FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
; FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
; FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
; FLATSCR: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
; FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def $scc
; FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc
; FLATSCR: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def $scc
; FLATSCR: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; FLATSCR: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
; FLATSCR: $sgpr33 = S_ADD_U32 $sgpr33, 8192, implicit-def $scc
; FLATSCR: $sgpr33 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
; FLATSCR: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; FLATSCR: $sgpr33 = S_SUB_U32 $sgpr33, 8192, implicit-def $scc
; FLATSCR: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 24576, implicit-def $scc
; FLATSCR: $sgpr33 = S_ADD_I32 $sgpr33, -8192, implicit-def $scc
; FLATSCR: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def $scc
; FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
; FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
; FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; FLATSCR: S_ENDPGM 0, implicit $vcc

View File

@ -27,13 +27,13 @@ body: |
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5)
; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 262080, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 524288, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 524288, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 524288, implicit-def $scc
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -524288, implicit-def $scc
; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.2, addrspace 5)

View File

@ -26,71 +26,71 @@ body: |
; GFX8-LABEL: name: pei_scavenge_vgpr_spill
; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
; GFX8: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX8: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX8: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; GFX8: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; GFX8: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; GFX8: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
; GFX8: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; GFX8: $sgpr7 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
; GFX8: $sgpr7 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
; GFX8: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; GFX8: $vcc_lo = S_MOV_B32 8192
; GFX8: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
; GFX8: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
; GFX8: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; GFX8: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
; GFX8: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
; GFX8: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
; GFX8: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX8: $sgpr4 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
; GFX8: $sgpr4 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
; GFX8: S_ENDPGM 0, csr_amdgpu_allvgprs
; GFX9-LABEL: name: pei_scavenge_vgpr_spill
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX9: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
; GFX9: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX9: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
; GFX9: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; GFX9: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; GFX9: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
; GFX9: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; GFX9: $sgpr7 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
; GFX9: $sgpr7 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
; GFX9: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; GFX9: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec
; GFX9: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
; GFX9: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
; GFX9: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
; GFX9: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX9: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
; GFX9: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9: $sgpr4 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
; GFX9: $sgpr4 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
; GFX9: S_ENDPGM 0, csr_amdgpu_allvgprs
; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
; GFX9-FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
; GFX9-FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def $scc
; GFX9-FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc
; GFX9-FLATSCR: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def $scc
; GFX9-FLATSCR: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
; GFX9-FLATSCR: $vcc_hi = S_ADD_U32 $sgpr33, 8192, implicit-def $scc
; GFX9-FLATSCR: $vcc_hi = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
; GFX9-FLATSCR: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec
; GFX9-FLATSCR: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 24576, implicit-def $scc
; GFX9-FLATSCR: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def $scc
; GFX9-FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
; GFX9-FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9-FLATSCR: S_ENDPGM 0, csr_amdgpu_allvgprs

View File

@ -602,7 +602,7 @@ body: |
; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0
; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
; GCN64-MUBUF: $sgpr2 = S_ADD_U32 $sgpr33, 262144, implicit-def $scc
; GCN64-MUBUF: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def $scc
; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, align 4096, addrspace 5)
; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@ -764,7 +764,7 @@ body: |
; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
; GCN32-MUBUF: $sgpr1 = S_ADD_U32 $sgpr33, 131072, implicit-def $scc
; GCN32-MUBUF: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def $scc
; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, align 4096, addrspace 5)
; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@ -922,7 +922,7 @@ body: |
; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0
; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
; GCN64-FLATSCR: $sgpr9 = S_ADD_U32 $sgpr33, 4096, implicit-def $scc
; GCN64-FLATSCR: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def $scc
; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, align 4096, addrspace 5)
; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
@ -1129,7 +1129,7 @@ body: |
; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0
; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
; GCN64-MUBUF: $sgpr2 = S_ADD_U32 $sgpr33, 262144, implicit-def $scc
; GCN64-MUBUF: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def $scc
; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, align 4096, addrspace 5)
; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
@ -1265,7 +1265,7 @@ body: |
; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
; GCN32-MUBUF: $sgpr1 = S_ADD_U32 $sgpr33, 131072, implicit-def $scc
; GCN32-MUBUF: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def $scc
; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, align 4096, addrspace 5)
; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
@ -1397,7 +1397,7 @@ body: |
; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0
; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
; GCN64-FLATSCR: $sgpr9 = S_ADD_U32 $sgpr33, 4096, implicit-def $scc
; GCN64-FLATSCR: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def $scc
; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, align 4096, addrspace 5)
; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)

View File

@ -200,7 +200,7 @@ entry:
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec
; GCN: s_mov_b32 s33, s32
; GCN-DAG: s_add_u32 s32, s32, 0x400
; GCN-DAG: s_addk_i32 s32, 0x400
; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
@ -224,7 +224,7 @@ entry:
; GCN-DAG: v_readlane_b32 s34, v42, 0
; GCN-DAG: v_readlane_b32 s35, v42, 1
; GCN: s_sub_u32 s32, s32, 0x400
; GCN: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33,
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload

View File

@ -78,10 +78,10 @@ entry:
; 0x40000 / 64 = 4096 (for wave64)
%a = load volatile i32, i32 addrspace(5)* %aptr
; MUBUF: s_add_u32 s32, s32, 0x40000
; MUBUF: s_add_i32 s32, s32, 0x40000
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Spill
; MUBUF: s_sub_u32 s32, s32, 0x40000
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0x1000
; MUBUF: s_add_i32 s32, s32, 0xfffc0000
; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1000
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a)
@ -97,10 +97,10 @@ entry:
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
; MUBUF: s_add_u32 s32, s32, 0x40000
; MUBUF: s_add_i32 s32, s32, 0x40000
; MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Reload
; MUBUF: s_sub_u32 s32, s32, 0x40000
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0x1000
; MUBUF: s_add_i32 s32, s32, 0xfffc0000
; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1000
; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload
; Force %a to spill with no free SGPRs
@ -202,9 +202,9 @@ entry:
%aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
; 0x40000 / 64 = 4096 (for wave64)
; MUBUF: s_add_u32 s4, s32, 0x40000
; MUBUF: s_add_i32 s4, s32, 0x40000
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
; FLATSCR: s_add_u32 s0, s32, 0x1000
; FLATSCR: s_add_i32 s0, s32, 0x1000
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s0 ; 4-byte Folded Spill
%a = load volatile i32, i32 addrspace(5)* %aptr
@ -257,7 +257,7 @@ entry:
%bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
; 0x3ff00 / 64 = 4092 (for wave64)
; MUBUF: s_add_u32 s4, s32, 0x3ff00
; MUBUF: s_add_i32 s4, s32, 0x3ff00
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4092 ; 8-byte Folded Spill

View File

@ -45,11 +45,12 @@ entry:
}
; CHECK-LABEL: test_limited_sgpr
; GFX6: s_add_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
; GFX6: s_add_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
; GFX6: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
; GFX6: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
; GFX6: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], s32
; GFX6-NEXT: s_sub_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
; GFX6-NEXT: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
; GFX6: NumSgprs: 48
; GFX6: ScratchSize: 8608

View File

@ -6,7 +6,7 @@
define amdgpu_kernel void @max_alignment_128() #0 {
; VI-LABEL: max_alignment_128:
; VI: ; %bb.0:
; VI-NEXT: s_add_u32 s4, s4, s7
; VI-NEXT: s_add_i32 s4, s4, s7
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; VI-NEXT: s_add_u32 s0, s0, s7
; VI-NEXT: s_addc_u32 s1, s1, 0
@ -110,7 +110,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
define amdgpu_kernel void @stackrealign_attr() #1 {
; VI-LABEL: stackrealign_attr:
; VI: ; %bb.0:
; VI-NEXT: s_add_u32 s4, s4, s7
; VI-NEXT: s_add_i32 s4, s4, s7
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; VI-NEXT: s_add_u32 s0, s0, s7
; VI-NEXT: s_addc_u32 s1, s1, 0
@ -214,7 +214,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
define amdgpu_kernel void @alignstack_attr() #2 {
; VI-LABEL: alignstack_attr:
; VI: ; %bb.0:
; VI-NEXT: s_add_u32 s4, s4, s7
; VI-NEXT: s_add_i32 s4, s4, s7
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; VI-NEXT: s_add_u32 s0, s0, s7
; VI-NEXT: s_addc_u32 s1, s1, 0

View File

@ -32,17 +32,17 @@ define void @needs_align16_default_stack_align(i32 %idx) #0 {
}
; GCN-LABEL: {{^}}needs_align16_stack_align4:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}}
; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}}
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffffc00
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
; GCN: s_add_u32 s32, s32, 0x2800{{$}}
; GCN: s_addk_i32 s32, 0x2800{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: s_sub_u32 s32, s32, 0x2800
; GCN: s_addk_i32 s32, 0xd800
; GCN: ; ScratchSize: 160
define void @needs_align16_stack_align4(i32 %idx) #2 {
@ -53,17 +53,17 @@ define void @needs_align16_stack_align4(i32 %idx) #2 {
}
; GCN-LABEL: {{^}}needs_align32:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}}
; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}}
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffff800
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
; GCN: s_add_u32 s32, s32, 0x3000{{$}}
; GCN: s_addk_i32 s32, 0x3000{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: s_sub_u32 s32, s32, 0x3000
; GCN: s_addk_i32 s32, 0xd000
; GCN: ; ScratchSize: 192
define void @needs_align32(i32 %idx) #0 {
@ -74,12 +74,12 @@ define void @needs_align32(i32 %idx) #0 {
}
; GCN-LABEL: {{^}}force_realign4:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}}
; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}}
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffffff00
; GCN: s_add_u32 s32, s32, 0xd00{{$}}
; GCN: s_addk_i32 s32, 0xd00{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: s_sub_u32 s32, s32, 0xd00
; GCN: s_addk_i32 s32, 0xf300
; GCN: ; ScratchSize: 52
define void @force_realign4(i32 %idx) #1 {
@ -125,12 +125,12 @@ define amdgpu_kernel void @kernel_call_align4_from_5() {
; GCN-LABEL: {{^}}default_realign_align128:
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN-NEXT: s_add_u32 s33, s32, 0x1fc0
; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0
; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000
; GCN-NEXT: s_add_u32 s32, s32, 0x4000
; GCN-NEXT: s_addk_i32 s32, 0x4000
; GCN-NOT: s33
; GCN: buffer_store_dword v0, off, s[0:3], s33{{$}}
; GCN: s_sub_u32 s32, s32, 0x4000
; GCN: s_addk_i32 s32, 0xc000
; GCN: s_mov_b32 s33, [[FP_COPY]]
define void @default_realign_align128(i32 %idx) #0 {
%alloca.align = alloca i32, align 128, addrspace(5)
@ -159,7 +159,7 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 {
; GCN: buffer_store_dword [[VGPR_REG:v[0-9]+]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 [[VGPR_REG]], s33, 2
; GCN-DAG: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
; GCN-DAG: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
; GCN-DAG: v_writelane_b32 [[VGPR_REG]], s34, 3
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000
; GCN: s_mov_b32 s34, s32
@ -167,11 +167,11 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 {
; GCN: buffer_store_dword v32, off, s[0:3], s33 offset:1024
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34
; GCN-DAG: s_add_u32 s32, s32, 0x30000
; GCN-DAG: s_add_i32 s32, s32, 0x30000
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
; GCN: s_swappc_b64 s[30:31], s[4:5]
; GCN: s_sub_u32 s32, s32, 0x30000
; GCN: s_add_i32 s32, s32, 0xfffd0000
; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2
; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
@ -193,17 +193,17 @@ define i32 @needs_align1024_stack_args_used_inside_loop(%struct.Data addrspace(5
; GCN-LABEL: needs_align1024_stack_args_used_inside_loop:
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN-NEXT: s_add_u32 s33, s32, 0xffc0
; GCN-NEXT: s_add_i32 s33, s32, 0xffc0
; GCN-NEXT: s_mov_b32 [[BP_COPY:s[0-9]+]], s34
; GCN-NEXT: s_mov_b32 s34, s32
; GCN-NEXT: s_and_b32 s33, s33, 0xffff0000
; GCN-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0
; GCN-NEXT: v_lshrrev_b32_e64 [[VGPR_REG:v[0-9]+]], 6, s34
; GCN: s_add_u32 s32, s32, 0x30000
; GCN: s_add_i32 s32, s32, 0x30000
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:1024
; GCN: buffer_load_dword v{{[0-9]+}}, [[VGPR_REG]], s[0:3], 0 offen
; GCN: v_add_u32_e32 [[VGPR_REG]], vcc, 4, [[VGPR_REG]]
; GCN: s_sub_u32 s32, s32, 0x30000
; GCN: s_add_i32 s32, s32, 0xfffd0000
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
; GCN-NEXT: s_mov_b32 s34, [[BP_COPY]]
; GCN-NEXT: s_setpc_b64 s[30:31]
@ -290,16 +290,16 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i
; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset
; GCN: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: s_add_u32 s6, s32, 0x42100
; GCN-NEXT: s_add_i32 s6, s32, 0x42100
; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, s33
; GCN-NOT: v_mov_b32_e32 v0, 0x1088
; GCN-NEXT: s_add_u32 s6, s32, 0x42200
; GCN-NEXT: s_add_i32 s6, s32, 0x42200
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
; GCN-NEXT: v_mov_b32_e32 v0, s34
; GCN-NOT: v_mov_b32_e32 v0, 0x108c
; GCN-NEXT: s_add_u32 s6, s32, 0x42300
; GCN-NEXT: s_add_i32 s6, s32, 0x42300
; GCN-NEXT: s_mov_b32 s34, s32
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
%local_val = alloca i32, align 128, addrspace(5)

View File

@ -11,7 +11,7 @@ define hidden void @widget() {
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 v40, s33, 2
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: flat_load_dword v0, v[0:1]
@ -53,7 +53,7 @@ define hidden void @widget() {
; GCN-NEXT: BB0_7: ; %UnifiedReturnBlock
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
@ -191,7 +191,7 @@ define hidden void @blam() {
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 v43, s33, 4
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x800
; GCN-NEXT: s_addk_i32 s32, 0x800
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill

View File

@ -1129,8 +1129,8 @@ declare void @external_void_func_void() #1
; GCN-NEXT: v_writelane_b32 v40, s33, 2
; GCN: s_mov_b32 s33, s32
; GFX1064: s_add_u32 s32, s32, 0x400
; GFX1032: s_add_u32 s32, s32, 0x200
; GFX1064: s_addk_i32 s32, 0x400
; GFX1032: s_addk_i32 s32, 0x200
; GCN-DAG: v_writelane_b32 v40, s30, 0
@ -1140,8 +1140,8 @@ declare void @external_void_func_void() #1
; GCN-DAG: v_readlane_b32 s5, v40, 1
; GFX1064: s_sub_u32 s32, s32, 0x400
; GFX1032: s_sub_u32 s32, s32, 0x200
; GFX1064: s_addk_i32 s32, 0xfc00
; GFX1032: s_addk_i32 s32, 0xfe00
; GCN: v_readlane_b32 s33, v40, 2
; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}

View File

@ -354,7 +354,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11]
; GFX9-O0-NEXT: v_writelane_b32 v3, s33, 7
; GFX9-O0-NEXT: s_mov_b32 s33, s32
; GFX9-O0-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-O0-NEXT: s_addk_i32 s32, 0x400
; GFX9-O0-NEXT: v_writelane_b32 v3, s30, 0
; GFX9-O0-NEXT: v_writelane_b32 v3, s31, 1
; GFX9-O0-NEXT: v_writelane_b32 v3, s8, 2
@ -395,7 +395,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11]
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[4:7], s8 offset:4
; GFX9-O0-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-O0-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-O0-NEXT: v_readlane_b32 s33, v3, 7
; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
@ -414,7 +414,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11]
; GFX9-O3-NEXT: s_mov_b32 s14, s33
; GFX9-O3-NEXT: s_mov_b32 s33, s32
; GFX9-O3-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-O3-NEXT: s_addk_i32 s32, 0x400
; GFX9-O3-NEXT: s_mov_b64 s[10:11], s[30:31]
; GFX9-O3-NEXT: v_mov_b32_e32 v2, s8
; GFX9-O3-NEXT: s_not_b64 exec, exec
@ -431,7 +431,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9]
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1
; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-O3-NEXT: s_mov_b32 s33, s14
; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
@ -555,7 +555,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11]
; GFX9-O0-NEXT: v_writelane_b32 v11, s33, 9
; GFX9-O0-NEXT: s_mov_b32 s33, s32
; GFX9-O0-NEXT: s_add_u32 s32, s32, 0xc00
; GFX9-O0-NEXT: s_addk_i32 s32, 0xc00
; GFX9-O0-NEXT: v_writelane_b32 v11, s30, 0
; GFX9-O0-NEXT: v_writelane_b32 v11, s31, 1
; GFX9-O0-NEXT: v_writelane_b32 v11, s9, 2
@ -621,7 +621,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
; GFX9-O0-NEXT: s_mov_b32 s8, 0
; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], s8 offset:4
; GFX9-O0-NEXT: s_sub_u32 s32, s32, 0xc00
; GFX9-O0-NEXT: s_addk_i32 s32, 0xf400
; GFX9-O0-NEXT: v_readlane_b32 s33, v11, 9
; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
@ -663,7 +663,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11]
; GFX9-O3-NEXT: s_mov_b32 s14, s33
; GFX9-O3-NEXT: s_mov_b32 s33, s32
; GFX9-O3-NEXT: s_add_u32 s32, s32, 0x800
; GFX9-O3-NEXT: s_addk_i32 s32, 0x800
; GFX9-O3-NEXT: s_mov_b64 s[10:11], s[30:31]
; GFX9-O3-NEXT: v_mov_b32_e32 v6, s8
; GFX9-O3-NEXT: v_mov_b32_e32 v7, s9
@ -688,7 +688,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2
; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3
; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_sub_u32 s32, s32, 0x800
; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800
; GFX9-O3-NEXT: s_mov_b32 s33, s14
; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload