mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
c9256d3620
X86 and AArch64 expand it as libcall inside the target. And PowerPC also want to expand them as libcall for P8. So, propose an implement in the legalizer to common the logic and remove the code for X86/AArch64 to avoid the duplicate code. Reviewed By: Craig Topper Differential Revision: https://reviews.llvm.org/D91331
235 lines
7.7 KiB
LLVM
235 lines
7.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
|
|
|
|
; Same as vecreduce-fadd-legalization.ll, but without fmf.
|
|
|
|
declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
|
|
declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
|
|
declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
|
|
declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)
|
|
|
|
declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
|
|
declare float @llvm.vector.reduce.fadd.f32.v5f32(float, <5 x float>)
|
|
declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
|
|
declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
|
|
|
|
define half @test_v1f16(<1 x half> %a, half %s) nounwind {
|
|
; CHECK-LABEL: test_v1f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
; CHECK-NEXT: fcvt s1, h1
|
|
; CHECK-NEXT: fadd s0, s1, s0
|
|
; CHECK-NEXT: fcvt h0, s0
|
|
; CHECK-NEXT: ret
|
|
%b = call half @llvm.vector.reduce.fadd.f16.v1f16(half %s, <1 x half> %a)
|
|
ret half %b
|
|
}
|
|
|
|
define half @test_v1f16_neutral(<1 x half> %a) nounwind {
|
|
; CHECK-LABEL: test_v1f16_neutral:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%b = call half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a)
|
|
ret half %b
|
|
}
|
|
|
|
define float @test_v1f32(<1 x float> %a, float %s) nounwind {
|
|
; CHECK-LABEL: test_v1f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: fadd s0, s1, s0
|
|
; CHECK-NEXT: ret
|
|
%b = call float @llvm.vector.reduce.fadd.f32.v1f32(float %s, <1 x float> %a)
|
|
ret float %b
|
|
}
|
|
|
|
define float @test_v1f32_neutral(<1 x float> %a) nounwind {
|
|
; CHECK-LABEL: test_v1f32_neutral:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%b = call float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
|
|
ret float %b
|
|
}
|
|
|
|
define double @test_v1f64(<1 x double> %a, double %s) nounwind {
|
|
; CHECK-LABEL: test_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fadd d0, d1, d0
|
|
; CHECK-NEXT: ret
|
|
%b = call double @llvm.vector.reduce.fadd.f64.v1f64(double %s, <1 x double> %a)
|
|
ret double %b
|
|
}
|
|
|
|
define double @test_v1f64_neutral(<1 x double> %a) nounwind {
|
|
; CHECK-LABEL: test_v1f64_neutral:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%b = call double @llvm.vector.reduce.fadd.f64.v1f64(double -0.0, <1 x double> %a)
|
|
ret double %b
|
|
}
|
|
|
|
define fp128 @test_v1f128(<1 x fp128> %a, fp128 %s) nounwind {
|
|
; CHECK-LABEL: test_v1f128:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov v2.16b, v0.16b
|
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
|
; CHECK-NEXT: mov v1.16b, v2.16b
|
|
; CHECK-NEXT: b __addtf3
|
|
%b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 %s, <1 x fp128> %a)
|
|
ret fp128 %b
|
|
}
|
|
|
|
define fp128 @test_v1f128_neutral(<1 x fp128> %a) nounwind {
|
|
; CHECK-LABEL: test_v1f128_neutral:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 0xL00000000000000008000000000000000, <1 x fp128> %a)
|
|
ret fp128 %b
|
|
}
|
|
|
|
define float @test_v3f32(<3 x float> %a, float %s) nounwind {
|
|
; CHECK-LABEL: test_v3f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fadd s1, s1, s0
|
|
; CHECK-NEXT: mov s2, v0.s[1]
|
|
; CHECK-NEXT: fadd s1, s1, s2
|
|
; CHECK-NEXT: mov s0, v0.s[2]
|
|
; CHECK-NEXT: fadd s0, s1, s0
|
|
; CHECK-NEXT: ret
|
|
%b = call float @llvm.vector.reduce.fadd.f32.v3f32(float %s, <3 x float> %a)
|
|
ret float %b
|
|
}
|
|
|
|
define float @test_v3f32_neutral(<3 x float> %a) nounwind {
|
|
; CHECK-LABEL: test_v3f32_neutral:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov s1, v0.s[2]
|
|
; CHECK-NEXT: faddp s0, v0.2s
|
|
; CHECK-NEXT: fadd s0, s0, s1
|
|
; CHECK-NEXT: ret
|
|
%b = call float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a)
|
|
ret float %b
|
|
}
|
|
|
|
define float @test_v5f32(<5 x float> %a, float %s) nounwind {
|
|
; CHECK-LABEL: test_v5f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fadd s0, s5, s0
|
|
; CHECK-NEXT: fadd s0, s0, s1
|
|
; CHECK-NEXT: fadd s0, s0, s2
|
|
; CHECK-NEXT: fadd s0, s0, s3
|
|
; CHECK-NEXT: fadd s0, s0, s4
|
|
; CHECK-NEXT: ret
|
|
%b = call float @llvm.vector.reduce.fadd.f32.v5f32(float %s, <5 x float> %a)
|
|
ret float %b
|
|
}
|
|
|
|
define float @test_v5f32_neutral(<5 x float> %a) nounwind {
|
|
; CHECK-LABEL: test_v5f32_neutral:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fadd s0, s0, s1
|
|
; CHECK-NEXT: fadd s0, s0, s2
|
|
; CHECK-NEXT: fadd s0, s0, s3
|
|
; CHECK-NEXT: fadd s0, s0, s4
|
|
; CHECK-NEXT: ret
|
|
%b = call float @llvm.vector.reduce.fadd.f32.v5f32(float -0.0, <5 x float> %a)
|
|
ret float %b
|
|
}
|
|
|
|
define fp128 @test_v2f128(<2 x fp128> %a, fp128 %s) nounwind {
|
|
; CHECK-LABEL: test_v2f128:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32 // =32
|
|
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: mov v1.16b, v0.16b
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: bl __addtf3
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: add sp, sp, #32 // =32
|
|
; CHECK-NEXT: b __addtf3
|
|
%b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 %s, <2 x fp128> %a)
|
|
ret fp128 %b
|
|
}
|
|
|
|
define fp128 @test_v2f128_neutral(<2 x fp128> %a) nounwind {
|
|
; CHECK-LABEL: test_v2f128_neutral:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: b __addtf3
|
|
%b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a)
|
|
ret fp128 %b
|
|
}
|
|
|
|
define float @test_v16f32(<16 x float> %a, float %s) nounwind {
|
|
; CHECK-LABEL: test_v16f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov s22, v0.s[3]
|
|
; CHECK-NEXT: mov s23, v0.s[2]
|
|
; CHECK-NEXT: mov s24, v0.s[1]
|
|
; CHECK-NEXT: fadd s0, s4, s0
|
|
; CHECK-NEXT: fadd s0, s0, s24
|
|
; CHECK-NEXT: fadd s0, s0, s23
|
|
; CHECK-NEXT: fadd s0, s0, s22
|
|
; CHECK-NEXT: mov s21, v1.s[1]
|
|
; CHECK-NEXT: fadd s0, s0, s1
|
|
; CHECK-NEXT: mov s20, v1.s[2]
|
|
; CHECK-NEXT: fadd s0, s0, s21
|
|
; CHECK-NEXT: mov s19, v1.s[3]
|
|
; CHECK-NEXT: fadd s0, s0, s20
|
|
; CHECK-NEXT: fadd s0, s0, s19
|
|
; CHECK-NEXT: mov s18, v2.s[1]
|
|
; CHECK-NEXT: fadd s0, s0, s2
|
|
; CHECK-NEXT: mov s17, v2.s[2]
|
|
; CHECK-NEXT: fadd s0, s0, s18
|
|
; CHECK-NEXT: mov s16, v2.s[3]
|
|
; CHECK-NEXT: fadd s0, s0, s17
|
|
; CHECK-NEXT: fadd s0, s0, s16
|
|
; CHECK-NEXT: mov s7, v3.s[1]
|
|
; CHECK-NEXT: fadd s0, s0, s3
|
|
; CHECK-NEXT: mov s6, v3.s[2]
|
|
; CHECK-NEXT: fadd s0, s0, s7
|
|
; CHECK-NEXT: mov s5, v3.s[3]
|
|
; CHECK-NEXT: fadd s0, s0, s6
|
|
; CHECK-NEXT: fadd s0, s0, s5
|
|
; CHECK-NEXT: ret
|
|
%b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a)
|
|
ret float %b
|
|
}
|
|
|
|
define float @test_v16f32_neutral(<16 x float> %a) nounwind {
|
|
; CHECK-LABEL: test_v16f32_neutral:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov s21, v0.s[3]
|
|
; CHECK-NEXT: mov s22, v0.s[2]
|
|
; CHECK-NEXT: faddp s0, v0.2s
|
|
; CHECK-NEXT: fadd s0, s0, s22
|
|
; CHECK-NEXT: fadd s0, s0, s21
|
|
; CHECK-NEXT: mov s20, v1.s[1]
|
|
; CHECK-NEXT: fadd s0, s0, s1
|
|
; CHECK-NEXT: mov s19, v1.s[2]
|
|
; CHECK-NEXT: fadd s0, s0, s20
|
|
; CHECK-NEXT: mov s18, v1.s[3]
|
|
; CHECK-NEXT: fadd s0, s0, s19
|
|
; CHECK-NEXT: fadd s0, s0, s18
|
|
; CHECK-NEXT: mov s17, v2.s[1]
|
|
; CHECK-NEXT: fadd s0, s0, s2
|
|
; CHECK-NEXT: mov s16, v2.s[2]
|
|
; CHECK-NEXT: fadd s0, s0, s17
|
|
; CHECK-NEXT: mov s7, v2.s[3]
|
|
; CHECK-NEXT: fadd s0, s0, s16
|
|
; CHECK-NEXT: fadd s0, s0, s7
|
|
; CHECK-NEXT: mov s6, v3.s[1]
|
|
; CHECK-NEXT: fadd s0, s0, s3
|
|
; CHECK-NEXT: mov s5, v3.s[2]
|
|
; CHECK-NEXT: fadd s0, s0, s6
|
|
; CHECK-NEXT: mov s4, v3.s[3]
|
|
; CHECK-NEXT: fadd s0, s0, s5
|
|
; CHECK-NEXT: fadd s0, s0, s4
|
|
; CHECK-NEXT: ret
|
|
%b = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a)
|
|
ret float %b
|
|
}
|