mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
AMDGPU: Fix packing undef parts of build_vector
llvm-svn: 339511
This commit is contained in:
parent
626d2c1a5e
commit
50e345a4f8
@ -4287,21 +4287,30 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
|
||||
}
|
||||
|
||||
assert(VT == MVT::v2f16 || VT == MVT::v2i16);
|
||||
assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
|
||||
|
||||
SDValue Lo = Op.getOperand(0);
|
||||
SDValue Hi = Op.getOperand(1);
|
||||
|
||||
Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
|
||||
Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
|
||||
// Avoid adding defined bits with the zero_extend.
|
||||
if (Hi.isUndef()) {
|
||||
Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
|
||||
SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
|
||||
return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
|
||||
}
|
||||
|
||||
Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
|
||||
Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
|
||||
Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);
|
||||
|
||||
SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
|
||||
DAG.getConstant(16, SL, MVT::i32));
|
||||
if (Lo.isUndef())
|
||||
return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);
|
||||
|
||||
Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
|
||||
Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
|
||||
|
||||
SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
|
||||
|
||||
return DAG.getNode(ISD::BITCAST, SL, VT, Or);
|
||||
}
|
||||
|
||||
|
@ -1461,13 +1461,32 @@ class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPa
|
||||
def : ExpPattern<AMDGPUexport, i32, EXP>;
|
||||
def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
|
||||
|
||||
// COPY_TO_REGCLASS is workaround tablegen bug from multiple outputs
|
||||
// COPY is workaround tablegen bug from multiple outputs
|
||||
// from S_LSHL_B32's multiple outputs from implicit scc def.
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector (i16 0), i16:$src1)),
|
||||
(v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0))
|
||||
(v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector i16:$src0, (i16 undef))),
|
||||
(v2i16 (COPY $src0))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2f16 (build_vector f16:$src0, (f16 undef))),
|
||||
(v2f16 (COPY $src0))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector (i16 undef), i16:$src1)),
|
||||
(v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2f16 (build_vector (f16 undef), f16:$src1)),
|
||||
(v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
|
||||
>;
|
||||
|
||||
let SubtargetPredicate = HasVOP3PInsts in {
|
||||
def : GCNPat <
|
||||
|
380
test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
Normal file
380
test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
Normal file
@ -0,0 +1,380 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
|
||||
|
||||
define void @undef_lo_v2i16(i16 %arg0) {
|
||||
; GFX9-LABEL: undef_lo_v2i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v0
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_lo_v2i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v0
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1
|
||||
call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.lo);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_lo_v2f16(half %arg0) {
|
||||
; GFX9-LABEL: undef_lo_v2f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v0
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_lo_v2f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v0
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.lo = insertelement <2 x half> undef, half %arg0, i32 1
|
||||
call void asm sideeffect "; use $0", "v"(<2 x half> %undef.lo);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_lo_op_v2f16(half %arg0) {
|
||||
; GFX9-LABEL: undef_lo_op_v2f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v0
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_lo_op_v2f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0x3c00
|
||||
; GFX8-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_e32 v0, 0x7e00, v0
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v0
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.lo = insertelement <2 x half> undef, half %arg0, i32 1
|
||||
%op = fadd <2 x half> %undef.lo, <half 1.0, half 1.0>
|
||||
call void asm sideeffect "; use $0", "v"(<2 x half> %op);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_lo_op_v2i16(i16 %arg0) {
|
||||
; GFX9-LABEL: undef_lo_op_v2i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: s_movk_i32 s6, 0x63
|
||||
; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0]
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v0
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_lo_op_v2i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0x63
|
||||
; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v0
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1
|
||||
%op = add <2 x i16> %undef.lo, <i16 99, i16 99>
|
||||
call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_lo3_v4i16(i16 %arg0) {
|
||||
; GFX9-LABEL: undef_lo3_v4i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v[0:1]
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_lo3_v4i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.lo = insertelement <4 x i16> undef, i16 %arg0, i32 1
|
||||
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_lo3_v4f16(half %arg0) {
|
||||
; GFX9-LABEL: undef_lo3_v4f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v[0:1]
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_lo3_v4f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.lo = insertelement <4 x half> undef, half %arg0, i32 1
|
||||
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_lo2_v4i16(<2 x i16> %arg0) {
|
||||
; GFX9-LABEL: undef_lo2_v4i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff0000
|
||||
; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v[0:1]
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_lo2_v4i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
|
||||
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
|
||||
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_lo2_v4f16(<2 x half> %arg0) {
|
||||
; GFX9-LABEL: undef_lo2_v4f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v0
|
||||
; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v[0:1]
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_lo2_v4f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
|
||||
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
|
||||
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_hi_v2i16(i16 %arg0) {
|
||||
; GFX9-LABEL: undef_hi_v2i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v0
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_hi_v2i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v0
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0
|
||||
call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.hi);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_hi_v2f16(half %arg0) {
|
||||
; GFX9-LABEL: undef_hi_v2f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v0
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_hi_v2f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v0
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.hi = insertelement <2 x half> undef, half %arg0, i32 0
|
||||
call void asm sideeffect "; use $0", "v"(<2 x half> %undef.hi);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_hi_op_v2f16(half %arg0) {
|
||||
; GFX9-LABEL: undef_hi_op_v2f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v0
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_hi_op_v2f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0
|
||||
; GFX8-NEXT: v_or_b32_e32 v0, 0x7e000000, v0
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v0
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.hi = insertelement <2 x half> undef, half %arg0, i32 0
|
||||
%op = fadd <2 x half> %undef.hi, <half 1.0, half 1.0>
|
||||
call void asm sideeffect "; use $0", "v"(<2 x half> %op);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_hi_op_v2i16(i16 %arg0) {
|
||||
; GFX9-LABEL: undef_hi_op_v2i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_movk_i32 s6, 0x63
|
||||
; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0]
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v0
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_hi_op_v2i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_add_u16_e32 v0, 0x63, v0
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v0
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0
|
||||
%op = add <2 x i16> %undef.hi, <i16 99, i16 99>
|
||||
call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_hi3_v4i16(i16 %arg0) {
|
||||
; GFX9-LABEL: undef_hi3_v4i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v[0:1]
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_hi3_v4i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.hi = insertelement <4 x i16> undef, i16 %arg0, i32 0
|
||||
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_hi3_v4f16(half %arg0) {
|
||||
; GFX9-LABEL: undef_hi3_v4f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v[0:1]
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_hi3_v4f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.hi = insertelement <4 x half> undef, half %arg0, i32 0
|
||||
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_hi2_v4i16(<2 x i16> %arg0) {
|
||||
; GFX9-LABEL: undef_hi2_v4i16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v[0:1]
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_hi2_v4i16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @undef_hi2_v4f16(<2 x half> %arg0) {
|
||||
; GFX9-LABEL: undef_hi2_v4f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: ;;#ASMSTART
|
||||
; GFX9-NEXT: ; use v[0:1]
|
||||
; GFX9-NEXT: ;;#ASMEND
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: undef_hi2_v4f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: ;;#ASMSTART
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
|
||||
ret void
|
||||
}
|
||||
|
@ -402,9 +402,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
|
||||
; FIXME: materialize constant directly in VGPR
|
||||
; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm:
|
||||
; GFX9-DAG: s_mov_b32 [[K01:s[0-9]+]], 0x20001
|
||||
; GFX9-DAG: s_pack_ll_b32_b16 [[K23:s[0-9]+]], 3, s{{[0-9]+}}
|
||||
; GFX9-DAG: s_mov_b32 [[K2:s[0-9]+]], 3
|
||||
; GFX9: v_mov_b32_e32 v0, [[K01]]
|
||||
; GFX9: v_mov_b32_e32 v1, [[K23]]
|
||||
; GFX9: v_mov_b32_e32 v1, [[K2]]
|
||||
; GFX9: s_swappc_b64
|
||||
define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
|
||||
call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
|
||||
|
@ -83,8 +83,10 @@ define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
|
||||
; GFX9: v_cvt_f16_f32_e32 v0, v0
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
|
||||
; GFX9-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 {
|
||||
%src0.ext = fpext half %src0 to float
|
||||
%src1.ext = fpext half %src1 to float
|
||||
|
Loading…
Reference in New Issue
Block a user