1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[AMDGPU] More codegen patterns for v2i16/v2f16 build_vector

It's simpler to do this at codegen time than to do ad-hoc constant
folding of machine instructions in SIFoldOperands.

Differential Revision: https://reviews.llvm.org/D88028
This commit is contained in:
Jay Foad 2020-09-16 16:44:31 +01:00
parent 54df34cd65
commit 7c2e74eb51
4 changed files with 12 additions and 226 deletions

View File

@ -1048,25 +1048,6 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
if (!Src0->isImm() && !Src1->isImm())
return false;
if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32 ||
MI->getOpcode() == AMDGPU::V_LSHL_ADD_U32 ||
MI->getOpcode() == AMDGPU::V_AND_OR_B32) {
if (Src0->isImm() && Src0->getImm() == 0) {
// v_lshl_or_b32 0, X, Y -> copy Y
// v_lshl_or_b32 0, X, K -> v_mov_b32 K
// v_lshl_add_b32 0, X, Y -> copy Y
// v_lshl_add_b32 0, X, K -> v_mov_b32 K
// v_and_or_b32 0, X, Y -> copy Y
// v_and_or_b32 0, X, K -> v_mov_b32 K
bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
MI->RemoveOperand(Src1Idx);
MI->RemoveOperand(Src0Idx);
MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
return true;
}
}
// and k0, k1 -> v_mov_b32 (k0 & k1)
// or k0, k1 -> v_mov_b32 (k0 | k1)
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)

View File

@ -2046,6 +2046,16 @@ def : GCNPat <
(S_LSHL_B32 SReg_32:$src1, (i16 16))
>;
def : GCNPat <
(v2i16 (build_vector (i16 SReg_32:$src1), (i16 0))),
(S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
>;
def : GCNPat <
(v2f16 (build_vector (f16 SReg_32:$src1), (f16 FP_ZERO))),
(S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
>;
def : GCNPat <
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)

View File

@ -422,64 +422,6 @@ body: |
S_ENDPGM 0, implicit $vcc
...
---
# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_reg{{$}}
# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
# GCN-NEXT: S_ENDPGM 0, implicit %2
name: constant_fold_lshl_or_reg0_immreg_reg
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
%2:vgpr_32 = V_LSHL_OR_B32 %0,%1, $vgpr0, implicit $exec
S_ENDPGM 0, implicit %2
...
---
# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_imm{{$}}
# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
# GCN-NEXT: S_ENDPGM 0, implicit %2
name: constant_fold_lshl_or_reg0_immreg_imm
tracksRegLiveness: true
body: |
bb.0:
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
%2:vgpr_32 = V_LSHL_OR_B32 %0, %1, 10, implicit $exec
S_ENDPGM 0, implicit %2
...
---
# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}}
# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
# GCN-NEXT: S_ENDPGM 0, implicit %3
name: constant_fold_lshl_or_reg0_immreg_immreg
tracksRegLiveness: true
body: |
bb.0:
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
%2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
%3:vgpr_32 = V_LSHL_OR_B32 %0, %1, %2, implicit $exec
S_ENDPGM 0, implicit %3
...
---
# GCN-LABEL: name: s_fold_andn2_imm_regimm_32{{$}}
# GCN: [[VAL:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1233024, implicit $exec
@ -820,152 +762,6 @@ body: |
...
---
# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_reg{{$}}
# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
# GCN-NEXT: S_ENDPGM
name: constant_fold_lshl_add_reg0_immreg_reg
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
%2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, $vgpr0, implicit $exec
S_ENDPGM 0, implicit %2
...
---
# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_imm{{$}}
# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
# GCN-NEXT: S_ENDPGM
name: constant_fold_lshl_add_reg0_immreg_imm
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
body: |
bb.0:
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
%2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, 10, implicit $exec
S_ENDPGM 0, implicit %2
...
---
# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_immreg{{$}}
# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
# GCN-NEXT: S_ENDPGM
name: constant_fold_lshl_add_reg0_immreg_immreg
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
body: |
bb.0:
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
%2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
%3:vgpr_32 = V_LSHL_ADD_U32 %0, %1, %2, implicit $exec
S_ENDPGM 0, implicit %3
...
---
# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_reg{{$}}
# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
# GCN-NEXT: S_ENDPGM
name: constant_fold_and_or_reg0_immreg_reg
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
%2:vgpr_32 = V_AND_OR_B32 %0, %1, $vgpr0, implicit $exec
S_ENDPGM 0, implicit %2
...
---
# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_imm{{$}}
# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
# GCN-NEXT: S_ENDPGM
name: constant_fold_and_or_reg0_immreg_imm
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
body: |
bb.0:
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
%2:vgpr_32 = V_AND_OR_B32 %0, %1, 10, implicit $exec
S_ENDPGM 0, implicit %2
...
---
# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_immreg{{$}}
# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
# GCN-NEXT: S_ENDPGM
name: constant_fold_and_or_reg0_immreg_immreg
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
body: |
bb.0:
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
%2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
%3:vgpr_32 = V_AND_OR_B32 %0, %1, %2, implicit $exec
S_ENDPGM 0, implicit %3
...
# This used to incorrectly interpret V_MOV_B32_sdwa as being a move
# immediate, and interpreting the src0_modifiers field as a
# materialized immediate.

View File

@ -718,11 +718,10 @@ define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal
; GCN-LABEL: {{^}}v_test_canonicalize_reg_undef_reg_reg_v4f16:
; GFX9: s_waitcnt
; GFX9-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
; GFX9-NEXT: v_and_b32_e32 v1, [[MASK]], v1
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX9-NEXT: v_and_b32_e32 v0, [[MASK]], v0
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
; GFX9-NEXT: s_setpc_b64