mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
29ffba4b56
This can be seen as a follow up to commit 0ee439b705e82a4fe20e2, that changed the second argument of __powidf2, __powisf2 and __powitf2 in compiler-rt from si_int to int. That was to align with how those runtimes are defined in libgcc. One thing that seem to have been missing in that patch was to make sure that the rest of LLVM also handle that the argument now depends on the size of int (not using the si_int machine mode for 32-bit). When using __builtin_powi for a target with 16-bit int clang crashed. And when emitting libcalls to those rtlib functions, typically when lowering @llvm.powi), the backend would always prepare the exponent argument as an i32 which caused miscompiles when the rtlib was compiled with 16-bit int. The solution used here is to use an overloaded type for the second argument in @llvm.powi. This way clang can use the "correct" type when lowering __builtin_powi, and then later when emitting the libcall it is assumed that the type used in @llvm.powi matches the rtlib function. One thing that needed some extra attention was that when vectorizing calls several passes did not support that several arguments could be overloaded in the intrinsics. This patch allows overload of a scalar operand by adding hasVectorInstrinsicOverloadedScalarOpd, with an entry for powi. Differential Revision: https://reviews.llvm.org/D99439
251 lines
8.8 KiB
LLVM
251 lines
8.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
|
|
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
|
|
|
define i16 @v_powi_f16(i16 %l, i32 %r) {
|
|
; GCN-LABEL: v_powi_f16:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
|
; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1
|
|
; GCN-NEXT: v_log_f32_e32 v0, v0
|
|
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
|
|
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
|
; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%l.cast = bitcast i16 %l to half
|
|
%res = call half @llvm.powi.f16.i32(half %l.cast, i32 %r)
|
|
%res.cast = bitcast half %res to i16
|
|
ret i16 %res.cast
|
|
}
|
|
|
|
define float @v_powi_f32(float %l, i32 %r) {
|
|
; GCN-LABEL: v_powi_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_log_f32_e32 v0, v0
|
|
; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1
|
|
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
|
|
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 %r)
|
|
ret float %res
|
|
}
|
|
|
|
define float @v_powi_0_f32(float %l) {
|
|
; GCN-LABEL: v_powi_0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 0)
|
|
ret float %res
|
|
}
|
|
|
|
define float @v_powi_1_f32(float %l) {
|
|
; GCN-LABEL: v_powi_1_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 1)
|
|
ret float %res
|
|
}
|
|
|
|
define float @v_powi_neg1_f32(float %l) {
|
|
; GFX7-LABEL: v_powi_neg1_f32:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
|
|
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
|
|
; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0
|
|
; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2
|
|
; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
|
|
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
|
|
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
|
|
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
|
|
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
|
|
; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4
|
|
; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_powi_neg1_f32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
|
|
; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
|
|
; GFX8-NEXT: v_rcp_f32_e32 v3, v1
|
|
; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0
|
|
; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3
|
|
; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3
|
|
; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2
|
|
; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4
|
|
; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2
|
|
; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4
|
|
; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 -1)
|
|
ret float %res
|
|
}
|
|
|
|
define float @v_powi_2_f32(float %l) {
|
|
; GCN-LABEL: v_powi_2_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 2)
|
|
ret float %res
|
|
}
|
|
|
|
define float @v_powi_neg2_f32(float %l) {
|
|
; GFX7-LABEL: v_powi_neg2_f32:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
|
|
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
|
|
; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0
|
|
; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2
|
|
; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
|
|
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
|
|
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
|
|
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
|
|
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
|
|
; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4
|
|
; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_powi_neg2_f32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
|
|
; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
|
|
; GFX8-NEXT: v_rcp_f32_e32 v3, v1
|
|
; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0
|
|
; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3
|
|
; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3
|
|
; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2
|
|
; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4
|
|
; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2
|
|
; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4
|
|
; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 -2)
|
|
ret float %res
|
|
}
|
|
|
|
define float @v_powi_4_f32(float %l) {
|
|
; GCN-LABEL: v_powi_4_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 4)
|
|
ret float %res
|
|
}
|
|
|
|
define float @v_powi_8_f32(float %l) {
|
|
; GCN-LABEL: v_powi_8_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 8)
|
|
ret float %res
|
|
}
|
|
|
|
define float @v_powi_16_f32(float %l) {
|
|
; GCN-LABEL: v_powi_16_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 16)
|
|
ret float %res
|
|
}
|
|
|
|
define float @v_powi_128_f32(float %l) {
|
|
; GCN-LABEL: v_powi_128_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 128)
|
|
ret float %res
|
|
}
|
|
|
|
define float @v_powi_neg128_f32(float %l) {
|
|
; GFX7-LABEL: v_powi_neg128_f32:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
|
|
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
|
|
; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0
|
|
; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2
|
|
; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
|
|
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
|
|
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
|
|
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
|
|
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
|
|
; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4
|
|
; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: v_powi_neg128_f32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
|
|
; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
|
|
; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
|
|
; GFX8-NEXT: v_rcp_f32_e32 v3, v1
|
|
; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0
|
|
; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3
|
|
; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3
|
|
; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2
|
|
; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4
|
|
; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2
|
|
; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4
|
|
; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
%res = call float @llvm.powi.f32.i32(float %l, i32 -128)
|
|
ret float %res
|
|
}
|
|
|
|
; FIXME: f64 broken
|
|
; define double @v_powi_f64(double %l, i32 %r) {
|
|
; %res = call double @llvm.powi.f64.i32(double %l, i32 %r)
|
|
; ret double %res
|
|
; }
|
|
|
|
declare half @llvm.powi.f16.i32(half, i32) #0
|
|
declare float @llvm.powi.f32.i32(float, i32) #0
|
|
declare double @llvm.powi.f64.i32(double, i32) #0
|
|
|
|
attributes #0 = { nounwind readnone speculatable willreturn }
|