mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU: Fix scalarizing v4f16 fcanonicalize
llvm-svn: 338714
This commit is contained in:
parent
46d88ebd0c
commit
5e71c6a155
@ -589,6 +589,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::FCANONICALIZE, MVT::v4f16, Custom);
|
||||
|
||||
setOperationAction(ISD::SELECT, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::v4f16, Custom);
|
||||
@ -3575,6 +3576,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
return lowerDEBUGTRAP(Op, DAG);
|
||||
case ISD::FABS:
|
||||
case ISD::FNEG:
|
||||
case ISD::FCANONICALIZE:
|
||||
return splitUnaryVectorOp(Op, DAG);
|
||||
case ISD::SHL:
|
||||
case ISD::SRA:
|
||||
|
@ -6,6 +6,7 @@ declare half @llvm.fabs.f16(half) #0
|
||||
declare half @llvm.canonicalize.f16(half) #0
|
||||
declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
|
||||
declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
|
||||
declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
|
||||
@ -476,6 +477,24 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> a
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_canonicalize_var_v4f16:
|
||||
; GFX9: s_waitcnt
|
||||
; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
|
||||
; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
; VI-DAG: v_max_f16_sdwa [[CANON_ELT3:v[0-9]+]], v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; VI-DAG: v_max_f16_e32 [[CANON_ELT2:v[0-9]+]], v1, v1
|
||||
; VI-DAG: v_max_f16_sdwa [[CANON_ELT1:v[0-9]+]], v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; VI-DAG: v_max_f16_e32 [[CANON_ELT0:v[0-9]+]], v0, v0
|
||||
; VI-DAG: v_or_b32_e32 v0, [[CANON_ELT0]], [[CANON_ELT1]]
|
||||
; VI-DAG: v_or_b32_e32 v1, [[CANON_ELT2]], [[CANON_ELT3]]
|
||||
; VI: s_setpc_b64
|
||||
define <4 x half> @v_test_canonicalize_var_v4f16(<4 x half> %val) #1 {
|
||||
%canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %val)
|
||||
ret <4 x half> %canonicalized
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
|
||||
|
Loading…
x
Reference in New Issue
Block a user