1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

AMDGPU: Try to eliminate clearing of high bits of 16-bit instructions

These used to consistently be zeroed pre-gfx9, but gfx9 made the
situation complicated since now some still do and some don't. This
also manages to pick up a few cases that the pattern fails to optimize
away.

We handle some cases with instruction patterns, but some get
through. In particular this improves the integer cases.
This commit is contained in:
Matt Arsenault 2021-06-15 18:51:06 -04:00
parent 67612de11d
commit 248de533d0
19 changed files with 369 additions and 409 deletions

View File

@ -353,6 +353,105 @@ unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
return 2;
}
/// This list was mostly derived from experimentation.
bool GCNSubtarget::zeroesHigh16BitsOfDest(unsigned Opcode) const {
switch (Opcode) {
case AMDGPU::V_CVT_F16_F32_e32:
case AMDGPU::V_CVT_F16_F32_e64:
case AMDGPU::V_CVT_F16_U16_e32:
case AMDGPU::V_CVT_F16_U16_e64:
case AMDGPU::V_CVT_F16_I16_e32:
case AMDGPU::V_CVT_F16_I16_e64:
case AMDGPU::V_RCP_F16_e64:
case AMDGPU::V_RCP_F16_e32:
case AMDGPU::V_RSQ_F16_e64:
case AMDGPU::V_RSQ_F16_e32:
case AMDGPU::V_SQRT_F16_e64:
case AMDGPU::V_SQRT_F16_e32:
case AMDGPU::V_LOG_F16_e64:
case AMDGPU::V_LOG_F16_e32:
case AMDGPU::V_EXP_F16_e64:
case AMDGPU::V_EXP_F16_e32:
case AMDGPU::V_SIN_F16_e64:
case AMDGPU::V_SIN_F16_e32:
case AMDGPU::V_COS_F16_e64:
case AMDGPU::V_COS_F16_e32:
case AMDGPU::V_FLOOR_F16_e64:
case AMDGPU::V_FLOOR_F16_e32:
case AMDGPU::V_CEIL_F16_e64:
case AMDGPU::V_CEIL_F16_e32:
case AMDGPU::V_TRUNC_F16_e64:
case AMDGPU::V_TRUNC_F16_e32:
case AMDGPU::V_RNDNE_F16_e64:
case AMDGPU::V_RNDNE_F16_e32:
case AMDGPU::V_FRACT_F16_e64:
case AMDGPU::V_FRACT_F16_e32:
case AMDGPU::V_FREXP_MANT_F16_e64:
case AMDGPU::V_FREXP_MANT_F16_e32:
case AMDGPU::V_FREXP_EXP_I16_F16_e64:
case AMDGPU::V_FREXP_EXP_I16_F16_e32:
case AMDGPU::V_LDEXP_F16_e64:
case AMDGPU::V_LDEXP_F16_e32:
case AMDGPU::V_LSHLREV_B16_e64:
case AMDGPU::V_LSHLREV_B16_e32:
case AMDGPU::V_LSHRREV_B16_e64:
case AMDGPU::V_LSHRREV_B16_e32:
case AMDGPU::V_ASHRREV_I16_e64:
case AMDGPU::V_ASHRREV_I16_e32:
case AMDGPU::V_ADD_U16_e64:
case AMDGPU::V_ADD_U16_e32:
case AMDGPU::V_SUB_U16_e64:
case AMDGPU::V_SUB_U16_e32:
case AMDGPU::V_SUBREV_U16_e64:
case AMDGPU::V_SUBREV_U16_e32:
case AMDGPU::V_MUL_LO_U16_e64:
case AMDGPU::V_MUL_LO_U16_e32:
case AMDGPU::V_ADD_F16_e64:
case AMDGPU::V_ADD_F16_e32:
case AMDGPU::V_SUB_F16_e64:
case AMDGPU::V_SUB_F16_e32:
case AMDGPU::V_SUBREV_F16_e64:
case AMDGPU::V_SUBREV_F16_e32:
case AMDGPU::V_MUL_F16_e64:
case AMDGPU::V_MUL_F16_e32:
case AMDGPU::V_MAX_F16_e64:
case AMDGPU::V_MAX_F16_e32:
case AMDGPU::V_MIN_F16_e64:
case AMDGPU::V_MIN_F16_e32:
case AMDGPU::V_MAX_U16_e64:
case AMDGPU::V_MAX_U16_e32:
case AMDGPU::V_MIN_U16_e64:
case AMDGPU::V_MIN_U16_e32:
case AMDGPU::V_MAX_I16_e64:
case AMDGPU::V_MAX_I16_e32:
case AMDGPU::V_MIN_I16_e64:
case AMDGPU::V_MIN_I16_e32:
// On gfx10, all 16-bit instructions preserve the high bits.
return getGeneration() <= AMDGPUSubtarget::GFX9;
case AMDGPU::V_MAD_F16_e64:
case AMDGPU::V_MADAK_F16:
case AMDGPU::V_MADMK_F16:
case AMDGPU::V_MAC_F16_e64:
case AMDGPU::V_MAC_F16_e32:
case AMDGPU::V_FMAMK_F16:
case AMDGPU::V_FMAAK_F16:
case AMDGPU::V_MAD_U16_e64:
case AMDGPU::V_MAD_I16_e64:
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMAC_F16_e64:
case AMDGPU::V_FMAC_F16_e32:
case AMDGPU::V_DIV_FIXUP_F16_e64:
// In gfx9, the preferred handling of the unused high 16-bits changed. Most
// instructions maintain the legacy behavior of 0ing. Some instructions
// changed to preserving the high bits.
return getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
case AMDGPU::V_MAD_MIXLO_F16:
case AMDGPU::V_MAD_MIXHI_F16:
default:
return false;
}
}
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
const Function &F) const {
if (NWaves == 1)

View File

@ -286,6 +286,11 @@ public:
unsigned getConstantBusLimit(unsigned Opcode) const;
/// Returns if the result of this instruction with a 16-bit result returned in
/// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
/// the original value.
bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
bool hasIntClamp() const {
return HasIntClamp;
}

View File

@ -91,6 +91,7 @@ public:
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
bool tryFoldCndMask(MachineInstr &MI) const;
bool tryFoldZeroHighBits(MachineInstr &MI) const;
void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
const MachineOperand *isClamp(const MachineInstr &MI) const;
@ -1188,6 +1189,27 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
return true;
}
bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
MI.getOpcode() != AMDGPU::V_AND_B32_e32)
return false;
MachineOperand *Src0 = getImmOrMaterializedImm(*MRI, MI.getOperand(1));
if (!Src0->isImm() || Src0->getImm() != 0xffff)
return false;
Register Src1 = MI.getOperand(2).getReg();
MachineInstr *SrcDef = MRI->getVRegDef(Src1);
if (ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) {
Register Dst = MI.getOperand(0).getReg();
MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg());
MI.eraseFromParent();
return true;
}
return false;
}
void SIFoldOperands::foldInstOperand(MachineInstr &MI,
MachineOperand &OpToFold) const {
// We need mutate the operands of new mov instructions to add implicit
@ -1721,6 +1743,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
for (auto &MI : make_early_inc_range(*MBB)) {
tryFoldCndMask(MI);
if (tryFoldZeroHighBits(MI))
continue;
if (MI.isRegSequence() && tryFoldRegSequence(MI))
continue;

View File

@ -113,7 +113,7 @@ define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half ad
; VI-NEXT: v_max_f16_e32 v0, v2, v0
; VI-NEXT: v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-NEXT: v_max_f16_e32 v0, v0, v3
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64
; GFX9: s_waitcnt

View File

@ -97,7 +97,7 @@ define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 {
; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NNAN-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v1
; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
@ -178,7 +178,7 @@ define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 {
; VI-NNAN-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2
; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3
; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v4
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
@ -283,8 +283,8 @@ define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 {
; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3
; VI-NNAN-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2
; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v5
; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v4
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
@ -437,10 +437,10 @@ define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v5
; VI-NNAN-NEXT: v_max_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v4
; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v11
; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v10
; VI-NNAN-NEXT: v_or_b32_e32 v2, v2, v9
; VI-NNAN-NEXT: v_or_b32_e32 v3, v3, v8
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:

View File

@ -102,7 +102,7 @@ define amdgpu_kernel void @test_fmin3_olt_1_f16(half addrspace(1)* %out, half ad
; VI-NEXT: v_min_f16_e32 v0, v2, v0
; VI-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-NEXT: v_min_f16_e32 v0, v0, v3
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64
; GFX9: s_waitcnt

View File

@ -98,7 +98,7 @@ define <2 x half> @test_fmin_legacy_ule_v2f16(<2 x half> %a, <2 x half> %b) #0 {
; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NNAN-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v1
; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
@ -179,7 +179,7 @@ define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) #0 {
; VI-NNAN-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2
; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3
; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v4
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
@ -284,8 +284,8 @@ define <4 x half> @test_fmin_legacy_ule_v4f16(<4 x half> %a, <4 x half> %b) #0 {
; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3
; VI-NNAN-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2
; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v5
; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v4
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
@ -438,10 +438,10 @@ define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 {
; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v5
; VI-NNAN-NEXT: v_min_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v4
; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v11
; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v10
; VI-NNAN-NEXT: v_or_b32_e32 v2, v2, v9
; VI-NNAN-NEXT: v_or_b32_e32 v3, v3, v8
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmin_legacy_ule_v8f16:

View File

@ -73,8 +73,7 @@ entry:
; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]]
; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_0]]
; GCN: buffer_store_dword v[[R_V2_F16]]
@ -141,7 +140,7 @@ entry:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
; SIVI-NOT: v[[R_F16]]
; GFX9-NEXT: v_and_b32_e32 v[[R_F16]], 0xffff, v[[R_F16]]
; GFX9-NOT: v_and_b32
; GCN: buffer_store_dword v[[R_F16]]
define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
i32 addrspace(1)* %r,
@ -159,7 +158,7 @@ entry:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]|
; SIVI-NOT: v[[R_F16]]
; GFX9-NEXT: v_and_b32_e32 v[[R_F16]], 0xffff, v[[R_F16]]
; GFX9-NOT: v_and_b32
; GCN: buffer_store_dword v[[R_F16]]
define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
i32 addrspace(1)* %r,

View File

@ -14,26 +14,20 @@ body: |
; GFX8: liveins: $vgpr0
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %and0:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX8: %smask:sreg_32 = S_MOV_B32 65535
; GFX8: %and1:vgpr_32 = V_AND_B32_e64 %smask, %op, implicit $exec
; GFX8: %vmask:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
; GFX8: %and2:vgpr_32 = V_AND_B32_e64 %vmask, %op, implicit $exec
; GFX8: $vgpr0 = COPY %and0
; GFX8: $vgpr1 = COPY %and1
; GFX8: $vgpr2 = COPY %and2
; GFX8: $vgpr0 = COPY %op
; GFX8: $vgpr1 = COPY %op
; GFX8: $vgpr2 = COPY %op
; GFX9-LABEL: name: v_cvt_f16_f32_altmask
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %and0:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX9: %smask:sreg_32 = S_MOV_B32 65535
; GFX9: %and1:vgpr_32 = V_AND_B32_e64 %smask, %op, implicit $exec
; GFX9: %vmask:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
; GFX9: %and2:vgpr_32 = V_AND_B32_e64 %vmask, %op, implicit $exec
; GFX9: $vgpr0 = COPY %and0
; GFX9: $vgpr1 = COPY %and1
; GFX9: $vgpr2 = COPY %and2
; GFX9: $vgpr0 = COPY %op
; GFX9: $vgpr1 = COPY %op
; GFX9: $vgpr2 = COPY %op
; GFX10-LABEL: name: v_cvt_f16_f32_altmask
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -103,19 +97,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_cvt_f16_f32
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_cvt_f16_f32
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -146,19 +136,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_U16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_U16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_cvt_f16_u16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_U16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_U16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_cvt_f16_u16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -189,19 +175,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_I16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_I16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_cvt_f16_i16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_I16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_I16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_cvt_f16_i16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -232,19 +214,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_RCP_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_rcp_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_RCP_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_rcp_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -274,19 +252,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_RSQ_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_rsq_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_RSQ_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_rsq_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -316,19 +290,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SQRT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_SQRT_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_sqrt_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SQRT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_SQRT_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_sqrt_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -359,19 +329,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LOG_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_LOG_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_log_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LOG_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_LOG_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_log_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -402,19 +368,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_EXP_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_exp_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_EXP_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_exp_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -445,19 +407,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_SIN_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_sin_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_SIN_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_sin_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -488,19 +446,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_COS_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_cos_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_COS_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_cos_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -531,19 +485,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FLOOR_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_floor_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FLOOR_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_floor_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -574,19 +524,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CEIL_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_ceil_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CEIL_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_ceil_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -617,19 +563,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_TRUNC_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_trunc_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_TRUNC_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_trunc_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -660,19 +602,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_RNDNE_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_rndne_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_RNDNE_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_rndne_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -703,19 +641,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FRACT_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_fract_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FRACT_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_fract_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -746,19 +680,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_frexp_mant_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_frexp_mant_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -789,19 +719,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop1
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_frexp_exp_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop1
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_frexp_exp_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -833,20 +759,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_ldexp_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_ldexp_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -880,20 +802,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LSHLREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_LSHLREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_lshlrev_b16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LSHLREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_LSHLREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_lshlrev_b16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -927,20 +845,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LSHRREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_LSHRREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_lshrrev_b16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LSHRREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_LSHRREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_lshrrev_b16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -974,20 +888,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ASHRREV_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_ASHRREV_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_ashrrev_i16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ASHRREV_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_ASHRREV_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_ashrrev_i16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1021,20 +931,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_ADD_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_add_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_ADD_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_add_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1068,20 +974,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SUB_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_SUB_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_sub_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SUB_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_SUB_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_sub_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1115,20 +1017,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SUBREV_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_SUBREV_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_subrev_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SUBREV_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_SUBREV_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_subrev_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1162,20 +1060,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MUL_LO_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MUL_LO_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_mul_lo_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MUL_LO_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MUL_LO_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_mul_lo_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1209,20 +1103,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_add_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_add_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1256,20 +1146,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_sub_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_sub_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1303,20 +1189,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_subrev_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_subrev_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1350,20 +1232,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_mul_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_mul_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1397,20 +1275,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_max_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_max_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1444,20 +1318,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_min_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_min_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1491,20 +1361,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MAX_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_max_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MAX_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_max_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1537,20 +1403,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MIN_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_min_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MIN_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_min_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1584,20 +1446,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MAX_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_max_i16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MAX_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_max_i16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1630,20 +1488,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MIN_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr1 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_min_i16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MIN_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX9: $vgpr0 = COPY %and_vop3
; GFX9: $vgpr1 = COPY %and_vop2
; GFX9: $vgpr0 = COPY %op_vop3
; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_min_i16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1677,8 +1531,7 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: %op:vgpr_32 = nofpexcept V_MAD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX8: $vgpr0 = COPY %and
; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_mad_f16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1716,8 +1569,7 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: %op:vgpr_32 = nofpexcept V_FMA_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX8: $vgpr0 = COPY %and
; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_fma_f16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1755,8 +1607,7 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: %op:vgpr_32 = nofpexcept V_DIV_FIXUP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX8: $vgpr0 = COPY %and
; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_div_fixup_f16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1793,8 +1644,7 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op:vgpr_32 = nofpexcept V_MADAK_F16 [[COPY]], [[COPY1]], 1234, implicit $mode, implicit $exec
; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX8: $vgpr0 = COPY %and
; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_madak_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1828,8 +1678,7 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op:vgpr_32 = nofpexcept V_MADMK_F16 [[COPY]], 1234, [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX8: $vgpr0 = COPY %and
; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_madmk_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1863,8 +1712,7 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op:vgpr_32 = nofpexcept V_FMAAK_F16 [[COPY]], [[COPY1]], 1234, implicit $mode, implicit $exec
; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX8: $vgpr0 = COPY %and
; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_fmaak_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1898,8 +1746,7 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op:vgpr_32 = nofpexcept V_FMAMK_F16 [[COPY]], 1234, [[COPY1]], implicit $mode, implicit $exec
; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX8: $vgpr0 = COPY %and
; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_fmamk_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1935,10 +1782,8 @@ body: |
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MAC_F16_e32 [[COPY]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MAC_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr0 = COPY %op_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX9-LABEL: name: v_mac_f16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -1986,10 +1831,8 @@ body: |
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_FMAC_F16_e32 [[COPY]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FMAC_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
; GFX8: $vgpr0 = COPY %and_vop2
; GFX8: $vgpr0 = COPY %and_vop3
; GFX8: $vgpr0 = COPY %op_vop2
; GFX8: $vgpr0 = COPY %op_vop3
; GFX9-LABEL: name: v_fmac_f16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0

View File

@ -35,8 +35,7 @@ entry:
; GCN-LABEL: {{^}}frexp_exp_f16_zext
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
; VI: v_and_b32_e32 v[[R_I32:[0-9]+]], 0xffff, v[[R_I16]]
; GCN: buffer_store_dword v[[R_I32]]
; GCN: buffer_store_dword v[[R_I16]]
define amdgpu_kernel void @frexp_exp_f16_zext(
i32 addrspace(1)* %r,
half addrspace(1)* %a) {

View File

@ -350,7 +350,7 @@ define amdgpu_kernel void @maxnum_v2f16(
; VI-NEXT: v_max_f16_e64 v1, s5, s5
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@ -438,7 +438,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a(
; VI-NEXT: v_max_f16_e64 v1, s4, s4
; VI-NEXT: v_max_f16_e32 v0, 0x4200, v0
; VI-NEXT: v_max_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@ -518,7 +518,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b(
; VI-NEXT: v_max_f16_e64 v1, s4, s4
; VI-NEXT: v_max_f16_e32 v0, 4.0, v0
; VI-NEXT: v_max_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@ -619,7 +619,7 @@ define amdgpu_kernel void @maxnum_v3f16(
; VI-NEXT: v_max_f16_e64 v1, s6, s6
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: v_max_f16_e64 v1, s7, s7
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_max_f16_e32 v1, v2, v1
@ -749,7 +749,7 @@ define amdgpu_kernel void @maxnum_v4f16(
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_max_f16_e64 v1, s7, s7
; VI-NEXT: v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v1, v0, v1
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_max_f16_e64 v0, s6, s6
; VI-NEXT: s_lshr_b32 s4, s4, 16
@ -758,7 +758,7 @@ define amdgpu_kernel void @maxnum_v4f16(
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_max_f16_e64 v3, s4, s4
; VI-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@ -867,12 +867,12 @@ define amdgpu_kernel void @fmax_v4f16_imm_a(
; VI-NEXT: v_max_f16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_max_f16_e32 v1, 0x4200, v1
; VI-NEXT: s_lshr_b32 s4, s4, 16
; VI-NEXT: v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v1, v1, v0
; VI-NEXT: v_max_f16_e32 v0, 0x4800, v2
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_mov_b32_e32 v3, 0x4000
; VI-NEXT: v_max_f16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;

View File

@ -378,7 +378,7 @@ define amdgpu_kernel void @minnum_v2f16_ieee(
; VI-NEXT: v_max_f16_e64 v1, s5, s5
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@ -501,7 +501,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_a(
; VI-NEXT: v_max_f16_e64 v1, s4, s4
; VI-NEXT: v_min_f16_e32 v0, 0x4200, v0
; VI-NEXT: v_min_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@ -581,7 +581,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_b(
; VI-NEXT: v_max_f16_e64 v1, s4, s4
; VI-NEXT: v_min_f16_e32 v0, 4.0, v0
; VI-NEXT: v_min_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@ -682,7 +682,7 @@ define amdgpu_kernel void @minnum_v3f16(
; VI-NEXT: v_max_f16_e64 v1, s6, s6
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: v_max_f16_e64 v1, s7, s7
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_min_f16_e32 v1, v2, v1
@ -812,7 +812,7 @@ define amdgpu_kernel void @minnum_v4f16(
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_max_f16_e64 v1, s7, s7
; VI-NEXT: v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v1, v0, v1
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_max_f16_e64 v0, s6, s6
; VI-NEXT: s_lshr_b32 s4, s4, 16
@ -821,7 +821,7 @@ define amdgpu_kernel void @minnum_v4f16(
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_max_f16_e64 v3, s4, s4
; VI-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@ -930,12 +930,12 @@ define amdgpu_kernel void @fmin_v4f16_imm_a(
; VI-NEXT: v_min_f16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_min_f16_e32 v1, 0x4200, v1
; VI-NEXT: s_lshr_b32 s4, s4, 16
; VI-NEXT: v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v1, v1, v0
; VI-NEXT: v_min_f16_e32 v0, 0x4800, v2
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_mov_b32_e32 v3, 0x4000
; VI-NEXT: v_min_f16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;

View File

@ -248,7 +248,7 @@ define i32 @zext_div_fixup_f16(half %x, half %y, half %z) {
; GFX8-NEXT: s_setpc_b64
; GFX9: v_cvt_f16_f32_e32 v0, v0
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: s_setpc_b64
; GFX10: v_cvt_f16_f32_e32 v0, v0
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
@ -260,9 +260,13 @@ define i32 @zext_fptrunc_f16(float %x) {
}
; GCN-LABEL: {{^}}zext_fptrunc_fma_f16:
; GFX8: v_fma_f32 v0, v0, v1, v2
; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX8-NEXT: s_setpc_b64
; GFX900: v_fma_f32 v0, v0, v1, v2
; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX900-NEXT: s_setpc_b64
; GFX906: v_fma_mixlo_f16 v0, v0, v1, v2
; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0

View File

@ -67,7 +67,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_f16_e32 v0, v0, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fadd_v2f16_fpexcept_strict:
@ -92,7 +92,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_f16_e32 v0, v0, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fadd_v2f16_fpexcept_ignore:
@ -117,7 +117,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_f16_e32 v0, v0, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fadd_v2f16_fpexcept_maytrap:
@ -143,7 +143,7 @@ define <3 x half> @v_constained_fadd_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_f16_e32 v0, v0, v2
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_add_f16_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@ -164,12 +164,9 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_add_f16_e32 v1, v1, v3
; GFX9-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_add_f16_e32 v0, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX9-NEXT: v_add_f16_e32 v1, v1, v3
; GFX9-NEXT: v_and_b32_e32 v0, v2, v0
; GFX9-NEXT: v_and_b32_e32 v1, v2, v1
; GFX9-NEXT: v_lshl_or_b32 v0, v5, 16, v0
; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@ -181,8 +178,8 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX8-NEXT: v_add_f16_e32 v1, v1, v3
; GFX8-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_f16_e32 v0, v0, v2
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
@ -234,7 +231,7 @@ define amdgpu_ps <2 x half> @s_constained_fadd_v2f16_fpexcept_strict(<2 x half>
; GFX8-NEXT: v_add_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_f16_e32 v1, s2, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_constained_fadd_v2f16_fpexcept_strict:

View File

@ -37,7 +37,7 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x ha
; GFX8-NEXT: v_fma_f16 v3, v5, v4, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict:
@ -67,7 +67,7 @@ define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x ha
; GFX8-NEXT: v_fma_f16 v6, v8, v7, v6
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v6
; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@ -117,10 +117,10 @@ define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x ha
; GFX8-NEXT: v_fma_f16 v7, v9, v8, v7
; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v7
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v6
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_v4f16_fpexcept_strict:
@ -221,7 +221,7 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %
; GFX8-NEXT: v_fma_f16 v3, -v5, -v4, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX8-NEXT: v_fma_f16 v0, -v0, -v1, v2
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg:

View File

@ -67,7 +67,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v2f16_fpexcept_strict:
@ -92,7 +92,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v2f16_fpexcept_ignore:
@ -117,7 +117,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap:
@ -143,7 +143,7 @@ define <3 x half> @v_constained_fmul_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v0, v0, v2
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@ -164,12 +164,9 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_mul_f16_e32 v1, v1, v3
; GFX9-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_mul_f16_e32 v0, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX9-NEXT: v_mul_f16_e32 v1, v1, v3
; GFX9-NEXT: v_and_b32_e32 v0, v2, v0
; GFX9-NEXT: v_and_b32_e32 v1, v2, v1
; GFX9-NEXT: v_lshl_or_b32 v0, v5, 16, v0
; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@ -181,8 +178,8 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
; GFX8-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v0, v0, v2
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
@ -234,7 +231,7 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half>
; GFX8-NEXT: v_mul_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_mul_f16_e32 v1, s2, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_constained_fmul_v2f16_fpexcept_strict:

View File

@ -61,7 +61,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@ -70,7 +69,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
@ -92,7 +91,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@ -101,7 +99,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
@ -123,7 +121,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@ -132,7 +129,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
@ -154,7 +151,6 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_f16_sdwa v4, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v0, v4, 16, v0
; GFX9-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
@ -164,7 +160,7 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@ -188,12 +184,9 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX9-NEXT: v_sub_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX9-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX9-NEXT: v_and_b32_e32 v0, v2, v0
; GFX9-NEXT: v_and_b32_e32 v1, v2, v1
; GFX9-NEXT: v_lshl_or_b32 v0, v5, 16, v0
; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@ -205,8 +198,8 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX8-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
@ -245,13 +238,12 @@ define amdgpu_ps half @s_constained_fsub_f16_fpexcept_strict(half inreg %x, half
define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) #0 {
; GFX9-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: s_lshr_b32 s0, s3, 16
; GFX9-NEXT: v_sub_f16_e32 v1, s2, v1
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: s_lshr_b32 s1, s2, 16
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_sub_f16_e32 v0, s1, v0
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: v_sub_f16_e32 v1, s2, v1
; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX9-NEXT: ; return to shader part epilog
;
@ -264,7 +256,7 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
; GFX8-NEXT: v_sub_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_sub_f16_e32 v1, s2, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_constained_fsub_v2f16_fpexcept_strict:

View File

@ -104,7 +104,7 @@ define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u16_sdwa v2, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v0, v0, v1 clamp
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v2i16:
@ -145,7 +145,7 @@ define <3 x i16> @v_uaddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
; GFX8-NEXT: v_add_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v0, v0, v2 clamp
; GFX8-NEXT: v_add_u16_e64 v1, v1, v3 clamp
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v3i16:
@ -192,8 +192,8 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX8-NEXT: v_add_u16_e64 v0, v0, v2 clamp
; GFX8-NEXT: v_add_u16_sdwa v2, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v1, v1, v3 clamp
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v4i16:

View File

@ -126,7 +126,7 @@ define <2 x i16> @v_usubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_u16_sdwa v2, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v0, v0, v1 clamp
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v2i16:
@ -173,7 +173,7 @@ define <3 x i16> @v_usubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
; GFX8-NEXT: v_sub_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v0, v0, v2 clamp
; GFX8-NEXT: v_sub_u16_e64 v1, v1, v3 clamp
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v3i16:
@ -228,8 +228,8 @@ define <2 x float> @v_usubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX8-NEXT: v_sub_u16_e64 v0, v0, v2 clamp
; GFX8-NEXT: v_sub_u16_sdwa v2, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v1, v1, v3 clamp
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v4i16: