mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AMDGPU] More codegen patterns for v2i16/v2f16 build_vector
It's simpler to do this at codegen time than to do ad-hoc constant folding of machine instructions in SIFoldOperands. Differential Revision: https://reviews.llvm.org/D88028
This commit is contained in:
parent
54df34cd65
commit
7c2e74eb51
@ -1048,25 +1048,6 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
|
||||
if (!Src0->isImm() && !Src1->isImm())
|
||||
return false;
|
||||
|
||||
if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32 ||
|
||||
MI->getOpcode() == AMDGPU::V_LSHL_ADD_U32 ||
|
||||
MI->getOpcode() == AMDGPU::V_AND_OR_B32) {
|
||||
if (Src0->isImm() && Src0->getImm() == 0) {
|
||||
// v_lshl_or_b32 0, X, Y -> copy Y
|
||||
// v_lshl_or_b32 0, X, K -> v_mov_b32 K
|
||||
// v_lshl_add_b32 0, X, Y -> copy Y
|
||||
// v_lshl_add_b32 0, X, K -> v_mov_b32 K
|
||||
// v_and_or_b32 0, X, Y -> copy Y
|
||||
// v_and_or_b32 0, X, K -> v_mov_b32 K
|
||||
bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
|
||||
MI->RemoveOperand(Src1Idx);
|
||||
MI->RemoveOperand(Src0Idx);
|
||||
|
||||
MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// and k0, k1 -> v_mov_b32 (k0 & k1)
|
||||
// or k0, k1 -> v_mov_b32 (k0 | k1)
|
||||
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
|
||||
|
@ -2046,6 +2046,16 @@ def : GCNPat <
|
||||
(S_LSHL_B32 SReg_32:$src1, (i16 16))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector (i16 SReg_32:$src1), (i16 0))),
|
||||
(S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2f16 (build_vector (f16 SReg_32:$src1), (f16 FP_ZERO))),
|
||||
(S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
|
||||
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
|
||||
|
@ -422,64 +422,6 @@ body: |
|
||||
S_ENDPGM 0, implicit $vcc
|
||||
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_reg{{$}}
|
||||
# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM 0, implicit %2
|
||||
|
||||
name: constant_fold_lshl_or_reg0_immreg_reg
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
|
||||
%2:vgpr_32 = V_LSHL_OR_B32 %0,%1, $vgpr0, implicit $exec
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_imm{{$}}
|
||||
# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM 0, implicit %2
|
||||
|
||||
name: constant_fold_lshl_or_reg0_immreg_imm
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
|
||||
%2:vgpr_32 = V_LSHL_OR_B32 %0, %1, 10, implicit $exec
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}}
|
||||
# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM 0, implicit %3
|
||||
|
||||
name: constant_fold_lshl_or_reg0_immreg_immreg
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
|
||||
%2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
|
||||
%3:vgpr_32 = V_LSHL_OR_B32 %0, %1, %2, implicit $exec
|
||||
S_ENDPGM 0, implicit %3
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
# GCN-LABEL: name: s_fold_andn2_imm_regimm_32{{$}}
|
||||
# GCN: [[VAL:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1233024, implicit $exec
|
||||
@ -820,152 +762,6 @@ body: |
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_reg{{$}}
|
||||
# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
name: constant_fold_lshl_add_reg0_immreg_reg
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
|
||||
%2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, $vgpr0, implicit $exec
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_imm{{$}}
|
||||
# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
name: constant_fold_lshl_add_reg0_immreg_imm
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
|
||||
%2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, 10, implicit $exec
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_immreg{{$}}
|
||||
# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
name: constant_fold_lshl_add_reg0_immreg_immreg
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
|
||||
%2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
|
||||
%3:vgpr_32 = V_LSHL_ADD_U32 %0, %1, %2, implicit $exec
|
||||
S_ENDPGM 0, implicit %3
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_reg{{$}}
|
||||
# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
name: constant_fold_and_or_reg0_immreg_reg
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
|
||||
%2:vgpr_32 = V_AND_OR_B32 %0, %1, $vgpr0, implicit $exec
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_imm{{$}}
|
||||
# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
name: constant_fold_and_or_reg0_immreg_imm
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
|
||||
%2:vgpr_32 = V_AND_OR_B32 %0, %1, 10, implicit $exec
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_immreg{{$}}
|
||||
# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
|
||||
name: constant_fold_and_or_reg0_immreg_immreg
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
|
||||
%2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
|
||||
%3:vgpr_32 = V_AND_OR_B32 %0, %1, %2, implicit $exec
|
||||
S_ENDPGM 0, implicit %3
|
||||
|
||||
...
|
||||
|
||||
# This used to incorrectly interpret V_MOV_B32_sdwa as being a move
|
||||
# immediate, and interpreting the src0_modifiers field as a
|
||||
# materialized immediate.
|
||||
|
@ -718,11 +718,10 @@ define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_canonicalize_reg_undef_reg_reg_v4f16:
|
||||
; GFX9: s_waitcnt
|
||||
; GFX9-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
|
||||
; GFX9-NEXT: v_and_b32_e32 v1, [[MASK]], v1
|
||||
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
||||
; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
|
||||
; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, [[MASK]], v0
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||
; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user