[AMDGPU] More codegen patterns for v2i16/v2f16 build_vector

It's simpler to do this at codegen time than to do ad-hoc constant folding of machine instructions in SIFoldOperands. Differential Revision: https://reviews.llvm.org/D88028
2025-01-31 12:41:49 +01:00 · 2020-09-16 16:44:31 +01:00 · 2020-09-16 16:44:31 +01:00 · 7c2e74eb51
commit 7c2e74eb51
parent 54df34cd65
4 changed files with 12 additions and 226 deletions
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@ -1048,25 +1048,6 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
  if (!Src0->isImm() && !Src1->isImm())
    return false;

-  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32 ||
-      MI->getOpcode() == AMDGPU::V_LSHL_ADD_U32 ||
-      MI->getOpcode() == AMDGPU::V_AND_OR_B32) {
-    if (Src0->isImm() && Src0->getImm() == 0) {
-      // v_lshl_or_b32 0, X, Y -> copy Y
-      // v_lshl_or_b32 0, X, K -> v_mov_b32 K
-      // v_lshl_add_b32 0, X, Y -> copy Y
-      // v_lshl_add_b32 0, X, K -> v_mov_b32 K
-      // v_and_or_b32 0, X, Y -> copy Y
-      // v_and_or_b32 0, X, K -> v_mov_b32 K
-      bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
-      MI->RemoveOperand(Src1Idx);
-      MI->RemoveOperand(Src0Idx);
-
-      MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
-      return true;
-    }
-  }
-
  // and k0, k1 -> v_mov_b32 (k0 & k1)
  // or k0, k1 -> v_mov_b32 (k0 | k1)
  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@ -2046,6 +2046,16 @@ def : GCNPat <
  (S_LSHL_B32 SReg_32:$src1, (i16 16))
 >;

+def : GCNPat <
+  (v2i16 (build_vector (i16 SReg_32:$src1), (i16 0))),
+  (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
+>;
+
+def : GCNPat <
+  (v2f16 (build_vector (f16 SReg_32:$src1), (f16 FP_ZERO))),
+  (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
+>;
+
 def : GCNPat <
  (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
  (COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
--- a/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@ -422,64 +422,6 @@ body:             |
    S_ENDPGM 0, implicit $vcc

 ...
---
-# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_reg{{$}}
-# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
-# GCN-NEXT: S_ENDPGM 0, implicit %2
-
-name: constant_fold_lshl_or_reg0_immreg_reg
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-    liveins: $vgpr0
-
-  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-  %2:vgpr_32 = V_LSHL_OR_B32 %0,%1, $vgpr0, implicit $exec
-  S_ENDPGM 0, implicit %2
-
-...
-
---
-
-# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_imm{{$}}
-# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
-# GCN-NEXT: S_ENDPGM 0, implicit %2
-
-name: constant_fold_lshl_or_reg0_immreg_imm
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-  %2:vgpr_32 = V_LSHL_OR_B32 %0, %1, 10, implicit $exec
-  S_ENDPGM 0, implicit %2
-
-...
-
---
-
-# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}}
-# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-# GCN-NEXT: S_ENDPGM 0, implicit %3
-
-name: constant_fold_lshl_or_reg0_immreg_immreg
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-  %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-  %3:vgpr_32 = V_LSHL_OR_B32 %0, %1, %2, implicit $exec
-  S_ENDPGM 0, implicit %3
-
-...
-
 ---
 # GCN-LABEL: name: s_fold_andn2_imm_regimm_32{{$}}
 # GCN: [[VAL:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1233024, implicit $exec
@ -820,152 +762,6 @@ body:             |

 ...

---
-# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_reg{{$}}
-# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_lshl_add_reg0_immreg_reg
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-    liveins: $vgpr0
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, $vgpr0, implicit $exec
-    S_ENDPGM 0, implicit %2
-
-...
-
---
-
-# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_imm{{$}}
-# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_lshl_add_reg0_immreg_imm
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, 10, implicit $exec
-    S_ENDPGM 0, implicit %2
-
-...
-
---
-
-# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_immreg{{$}}
-# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_lshl_add_reg0_immreg_immreg
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-    %3:vgpr_32 = V_LSHL_ADD_U32 %0, %1, %2, implicit $exec
-    S_ENDPGM 0, implicit %3
-
-...
-
---
-# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_reg{{$}}
-# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_and_or_reg0_immreg_reg
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-    liveins: $vgpr0
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_AND_OR_B32 %0, %1, $vgpr0, implicit $exec
-    S_ENDPGM 0, implicit %2
-
-...
-
---
-
-# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_imm{{$}}
-# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_and_or_reg0_immreg_imm
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_AND_OR_B32 %0, %1, 10, implicit $exec
-    S_ENDPGM 0, implicit %2
-
-...
-
---
-
-# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_immreg{{$}}
-# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-# GCN-NEXT: S_ENDPGM
-
-name:            constant_fold_and_or_reg0_immreg_immreg
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-
-body:             |
-  bb.0:
-
-    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-    %3:vgpr_32 = V_AND_OR_B32 %0, %1, %2, implicit $exec
-    S_ENDPGM 0, implicit %3
-
-...
-
 # This used to incorrectly interpret V_MOV_B32_sdwa as being a move
 # immediate, and interpreting the src0_modifiers field as a
 # materialized immediate.
--- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@ -718,11 +718,10 @@ define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal

 ; GCN-LABEL: {{^}}v_test_canonicalize_reg_undef_reg_reg_v4f16:
 ; GFX9: s_waitcnt
-; GFX9-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff
-; GFX9-NEXT: v_and_b32_e32 v1, [[MASK]], v1
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
 ; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
-; GFX9-NEXT: v_and_b32_e32 v0, [[MASK]], v0
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
 ; GFX9-NEXT: s_setpc_b64