1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[AMDGPU] Add f16 to i1 CodeGen patterns.

Follow patterns used for f32 and f64 types.

Differential Revision: https://reviews.llvm.org/D95964
This commit is contained in:
Wen-Heng (Jack) Chung 2021-02-03 13:56:11 -06:00
parent afb45f94ea
commit 5cc002b97f
7 changed files with 220 additions and 0 deletions

View File

@ -1795,6 +1795,8 @@ class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, S
(i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE))
>;
def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, i32, f32, fp_to_sint>;
def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, i64, f64, fp_to_uint>;

View File

@ -130,3 +130,75 @@ body: |
%3:vgpr(s32) = G_FPTOSI %2
$vgpr0 = COPY %3
...
---
name: fptosi_s16_to_s1_vv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GCN-LABEL: name: fptosi_s16_to_s1_vv
; GCN: liveins: $vgpr0
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
; GCN: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOSI %1
%3:vgpr(s1) = G_TRUNC %2
S_ENDPGM 0, implicit %3
...
---
name: fptosi_s16_to_s1_vs
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0
; GCN-LABEL: name: fptosi_s16_to_s1_vs
; GCN: liveins: $sgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
; GCN: S_ENDPGM 0, implicit %2
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOSI %1
%3:vgpr(s1) = G_TRUNC %2
S_ENDPGM 0, implicit %3
...
---
name: fptosi_s16_to_s1_fneg_vv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GCN-LABEL: name: fptosi_s16_to_s1_fneg_vv
; GCN: liveins: $vgpr0
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
; GCN: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
; GCN: S_ENDPGM 0, implicit %3
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
%3:vgpr(s32) = G_FPTOSI %2
%4:vgpr(s1) = G_TRUNC %3
S_ENDPGM 0, implicit %4
...

View File

@ -103,3 +103,75 @@ body: |
%3:vgpr(s32) = G_FPTOUI %2
$vgpr0 = COPY %3
...
---
name: fptoui_s16_to_s1_vv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GCN-LABEL: name: fptoui_s16_to_s1_vv
; GCN: liveins: $vgpr0
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
; GCN: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOUI %1
%3:vgpr(s1) = G_TRUNC %2
S_ENDPGM 0, implicit %3
...
---
name: fptoui_s16_to_s1_vs
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0
; GCN-LABEL: name: fptoui_s16_to_s1_vs
; GCN: liveins: $sgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
; GCN: S_ENDPGM 0, implicit %2
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOUI %1
%3:vgpr(s1) = G_TRUNC %2
S_ENDPGM 0, implicit %3
...
---
name: fptoui_s16_to_s1_fneg_vv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GCN-LABEL: name: fptoui_s16_to_s1_fneg_vv
; GCN: liveins: $vgpr0
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
; GCN: S_ENDPGM 0, implicit %3
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
%3:vgpr(s32) = G_FPTOUI %2
%4:vgpr(s1) = G_TRUNC %3
S_ENDPGM 0, implicit %4
...

View File

@ -644,3 +644,27 @@ body: |
%1:_(<2 x s64>) = G_FPTOSI %0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: test_fptosi_s16_to_s1
body: |
bb.0:
liveins: $vgpr0
; SI-LABEL: name: test_fptosi_s16_to_s1
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32)
; SI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
; SI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
; VI-LABEL: name: test_fptosi_s16_to_s1
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
; VI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
; VI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s1) = G_FPTOSI %1
S_ENDPGM 0, implicit %2
...

View File

@ -814,3 +814,27 @@ body: |
%1:_(<2 x s64>) = G_FPTOUI %0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: test_fptoui_s16_to_s1
body: |
bb.0:
liveins: $vgpr0
; SI-LABEL: name: test_fptoui_s16_to_s1
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32)
; SI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
; SI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
; VI-LABEL: name: test_fptoui_s16_to_s1
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
; VI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
; VI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s1) = G_FPTOSI %1
S_ENDPGM 0, implicit %2
...

View File

@ -132,3 +132,16 @@ entry:
store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fptosi_f16_to_i1:
; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}}
; SI: v_cmp_eq_f32_e32 vcc, -1.0, v{{[0-9]+}}
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}}
; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1]
define amdgpu_kernel void @fptosi_f16_to_i1(i1 addrspace(1)* %out, half %in) {
entry:
%conv = fptosi half %in to i1
store i1 %conv, i1 addrspace(1)* %out
ret void
}

View File

@ -130,3 +130,16 @@ entry:
store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fptoui_f16_to_i1:
; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}}
; SI: v_cmp_eq_f32_e32 vcc, 1.0, v{{[0-9]+}}
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1]
define amdgpu_kernel void @fptoui_f16_to_i1(i1 addrspace(1)* %out, half %in) {
entry:
%conv = fptoui half %in to i1
store i1 %conv, i1 addrspace(1)* %out
ret void
}