1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

AMDGPU/GlobalISel: Fix G_GEP with mixed SGPR/VGPR operands

The register bank for the destination of the sample argument copy was
wrong. We shouldn't be constraining each source to the result register
bank. Allow constraining the original register to the right size.

llvm-svn: 364928
This commit is contained in:
Matt Arsenault 2019-07-02 14:40:22 +00:00
parent 9db0cb6291
commit 25f8690d8f
3 changed files with 19 additions and 17 deletions

View File

@ -278,12 +278,15 @@ bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
.add(Lo1)
.add(Lo2)
.addImm(0);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
.addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
.add(Hi1)
.add(Hi2)
.addReg(CarryReg, RegState::Kill)
.addImm(0);
if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
return false;
}
BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
@ -292,9 +295,8 @@ bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
.addReg(DstHi)
.addImm(AMDGPU::sub1);
if (!RBI.constrainGenericRegister(DstReg, RC, MRI) ||
!RBI.constrainGenericRegister(I.getOperand(1).getReg(), RC, MRI) ||
!RBI.constrainGenericRegister(I.getOperand(2).getReg(), RC, MRI))
if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
return false;
I.eraseFromParent();

View File

@ -160,7 +160,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
; GFX6-LABEL: name: gep_p0_sgpr_vgpr
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
@ -172,7 +172,7 @@ body: |
; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX8-LABEL: name: gep_p0_sgpr_vgpr
; GFX8: $vcc_hi = IMPLICIT_DEF
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
@ -184,7 +184,7 @@ body: |
; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: gep_p0_sgpr_vgpr
; GFX9: $vcc_hi = IMPLICIT_DEF
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
@ -195,7 +195,7 @@ body: |
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_vgpr
; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
@ -207,7 +207,7 @@ body: |
; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_vgpr
; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
; GFX10-WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
@ -217,7 +217,7 @@ body: |
; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(p0) = COPY $sgpr0_sgpr1
%0:sgpr(p0) = COPY $sgpr0_sgpr1
%1:vgpr(s64) = COPY $vgpr0_vgpr1
%2:vgpr(p0) = G_GEP %0, %1
S_ENDPGM 0, implicit %2
@ -319,34 +319,34 @@ body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GFX6-LABEL: name: gep_p3_sgpr_vgpr
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
; GFX6: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX6: S_ENDPGM 0, implicit %2
; GFX8-LABEL: name: gep_p3_sgpr_vgpr
; GFX8: $vcc_hi = IMPLICIT_DEF
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %2:vgpr_32, dead %3:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX8: S_ENDPGM 0, implicit %2
; GFX9-LABEL: name: gep_p3_sgpr_vgpr
; GFX9: $vcc_hi = IMPLICIT_DEF
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_vgpr
; GFX10-WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-WAVE64: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX10-WAVE64: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_vgpr
; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
; GFX10-WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-WAVE32: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX10-WAVE32: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
%0:vgpr(p3) = COPY $sgpr0
%0:sgpr(p3) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(p3) = G_GEP %0, %1
S_ENDPGM 0, implicit %2

View File

@ -12,7 +12,7 @@ legalized: true
regBankSelected: true
# GCN: body:
# GCN: [[PTR:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
# Immediate offset:
# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0