mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AMDGPU] Add f16 to i1 CodeGen patterns.
Follow patterns used for f32 and f64 types. Differential Revision: https://reviews.llvm.org/D95964
This commit is contained in:
parent
afb45f94ea
commit
5cc002b97f
@ -1795,6 +1795,8 @@ class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, S
|
||||
(i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE))
|
||||
>;
|
||||
|
||||
def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
|
||||
def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
|
||||
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
|
||||
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, i32, f32, fp_to_sint>;
|
||||
def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, i64, f64, fp_to_uint>;
|
||||
|
@ -130,3 +130,75 @@ body: |
|
||||
%3:vgpr(s32) = G_FPTOSI %2
|
||||
$vgpr0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fptosi_s16_to_s1_vv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GCN-LABEL: name: fptosi_s16_to_s1_vv
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
|
||||
; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit %2
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s16) = G_TRUNC %0
|
||||
%2:vgpr(s32) = G_FPTOSI %1
|
||||
%3:vgpr(s1) = G_TRUNC %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fptosi_s16_to_s1_vs
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
|
||||
; GCN-LABEL: name: fptosi_s16_to_s1_vs
|
||||
; GCN: liveins: $sgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
|
||||
; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit %2
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:sgpr(s16) = G_TRUNC %0
|
||||
%2:vgpr(s32) = G_FPTOSI %1
|
||||
%3:vgpr(s1) = G_TRUNC %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fptosi_s16_to_s1_fneg_vv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GCN-LABEL: name: fptosi_s16_to_s1_fneg_vv
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
|
||||
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
|
||||
; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
|
||||
; GCN: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit %3
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s16) = G_TRUNC %0
|
||||
%2:vgpr(s16) = G_FNEG %1
|
||||
%3:vgpr(s32) = G_FPTOSI %2
|
||||
%4:vgpr(s1) = G_TRUNC %3
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
@ -103,3 +103,75 @@ body: |
|
||||
%3:vgpr(s32) = G_FPTOUI %2
|
||||
$vgpr0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fptoui_s16_to_s1_vv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GCN-LABEL: name: fptoui_s16_to_s1_vv
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
|
||||
; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit %2
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s16) = G_TRUNC %0
|
||||
%2:vgpr(s32) = G_FPTOUI %1
|
||||
%3:vgpr(s1) = G_TRUNC %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fptoui_s16_to_s1_vs
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
|
||||
; GCN-LABEL: name: fptoui_s16_to_s1_vs
|
||||
; GCN: liveins: $sgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
|
||||
; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit %2
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:sgpr(s16) = G_TRUNC %0
|
||||
%2:vgpr(s32) = G_FPTOUI %1
|
||||
%3:vgpr(s1) = G_TRUNC %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: fptoui_s16_to_s1_fneg_vv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GCN-LABEL: name: fptoui_s16_to_s1_fneg_vv
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
|
||||
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
|
||||
; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
|
||||
; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit %3
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s16) = G_TRUNC %0
|
||||
%2:vgpr(s16) = G_FNEG %1
|
||||
%3:vgpr(s32) = G_FPTOUI %2
|
||||
%4:vgpr(s1) = G_TRUNC %3
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
@ -644,3 +644,27 @@ body: |
|
||||
%1:_(<2 x s64>) = G_FPTOSI %0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fptosi_s16_to_s1
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; SI-LABEL: name: test_fptosi_s16_to_s1
|
||||
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
|
||||
; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32)
|
||||
; SI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
|
||||
; SI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
|
||||
; VI-LABEL: name: test_fptosi_s16_to_s1
|
||||
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
|
||||
; VI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
|
||||
; VI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s16) = G_TRUNC %0
|
||||
%2:_(s1) = G_FPTOSI %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
@ -814,3 +814,27 @@ body: |
|
||||
%1:_(<2 x s64>) = G_FPTOUI %0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fptoui_s16_to_s1
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; SI-LABEL: name: test_fptoui_s16_to_s1
|
||||
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
|
||||
; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32)
|
||||
; SI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
|
||||
; SI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
|
||||
; VI-LABEL: name: test_fptoui_s16_to_s1
|
||||
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
|
||||
; VI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32)
|
||||
; VI: S_ENDPGM 0, implicit [[TRUNC1]](s1)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s16) = G_TRUNC %0
|
||||
%2:_(s1) = G_FPTOSI %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
@ -132,3 +132,16 @@ entry:
|
||||
store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fptosi_f16_to_i1:
|
||||
; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}}
|
||||
; SI: v_cmp_eq_f32_e32 vcc, -1.0, v{{[0-9]+}}
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
|
||||
; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}}
|
||||
; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1]
|
||||
define amdgpu_kernel void @fptosi_f16_to_i1(i1 addrspace(1)* %out, half %in) {
|
||||
entry:
|
||||
%conv = fptosi half %in to i1
|
||||
store i1 %conv, i1 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -130,3 +130,16 @@ entry:
|
||||
store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fptoui_f16_to_i1:
|
||||
; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}}
|
||||
; SI: v_cmp_eq_f32_e32 vcc, 1.0, v{{[0-9]+}}
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
|
||||
; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
|
||||
; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1]
|
||||
define amdgpu_kernel void @fptoui_f16_to_i1(i1 addrspace(1)* %out, half %in) {
|
||||
entry:
|
||||
%conv = fptoui half %in to i1
|
||||
store i1 %conv, i1 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user