mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
2b3fd0441a
This patch added generation of SIMD bitwise insert BIT/BIF instructions. In the absence of GCC-like functionality for optimal constraints satisfaction during register allocation the bitwise insert and select patterns are matched by pseudo bitwise select BSP instruction with not tied def. It is expanded later after register allocation with def tied to BSL/BIT/BIF depending on operands registers. This allows to get rid of redundant moves. Reviewers: t.p.northover, samparker, dmgreen Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D74147
308 lines
10 KiB
LLVM
308 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \
|
|
; RUN: < %s -verify-machineinstrs | FileCheck %s
|
|
|
|
define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v8i8_i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s2, w1
|
|
; CHECK-NEXT: fmov s3, w0
|
|
; CHECK-NEXT: cmeq v2.8b, v3.8b, v2.8b
|
|
; CHECK-NEXT: dup v2.8b, v2.b[0]
|
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = icmp eq i8 %a, %b
|
|
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
|
|
ret <8x i8> %e
|
|
}
|
|
|
|
define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v8i8_f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0
|
|
; CHECK-NEXT: // kill: def $s1 killed $s1 def $d1
|
|
; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s
|
|
; CHECK-NEXT: dup v0.2s, v0.s[0]
|
|
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = fcmp oeq float %a, %b
|
|
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
|
|
ret <8x i8> %e
|
|
}
|
|
|
|
define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v8i8_f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcmeq d0, d0, d1
|
|
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = fcmp oeq double %a, %b
|
|
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
|
|
ret <8x i8> %e
|
|
}
|
|
|
|
define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v16i8_i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s2, w1
|
|
; CHECK-NEXT: fmov s3, w0
|
|
; CHECK-NEXT: cmeq v2.16b, v3.16b, v2.16b
|
|
; CHECK-NEXT: dup v2.16b, v2.b[0]
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = icmp eq i8 %a, %b
|
|
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
|
|
ret <16x i8> %e
|
|
}
|
|
|
|
define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v16i8_f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
|
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
|
|
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: dup v0.4s, v0.s[0]
|
|
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = fcmp oeq float %a, %b
|
|
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
|
|
ret <16x i8> %e
|
|
}
|
|
|
|
define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v16i8_f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-NEXT: fcmeq v0.2d, v0.2d, v1.2d
|
|
; CHECK-NEXT: dup v0.2d, v0.d[0]
|
|
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = fcmp oeq double %a, %b
|
|
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
|
|
ret <16x i8> %e
|
|
}
|
|
|
|
define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s2, w1
|
|
; CHECK-NEXT: fmov s3, w0
|
|
; CHECK-NEXT: cmeq v2.4h, v3.4h, v2.4h
|
|
; CHECK-NEXT: dup v2.4h, v2.h[0]
|
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = icmp eq i16 %a, %b
|
|
%e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
|
|
ret <4x i16> %e
|
|
}
|
|
|
|
define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s2, w1
|
|
; CHECK-NEXT: fmov s3, w0
|
|
; CHECK-NEXT: cmeq v2.8h, v3.8h, v2.8h
|
|
; CHECK-NEXT: dup v2.8h, v2.h[0]
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = icmp eq i16 %a, %b
|
|
%e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
|
|
ret <8x i16> %e
|
|
}
|
|
|
|
define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s2, w1
|
|
; CHECK-NEXT: fmov s3, w0
|
|
; CHECK-NEXT: cmeq v2.2s, v3.2s, v2.2s
|
|
; CHECK-NEXT: dup v2.2s, v2.s[0]
|
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = icmp eq i32 %a, %b
|
|
%e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
|
|
ret <2x i32> %e
|
|
}
|
|
|
|
define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s2, w1
|
|
; CHECK-NEXT: fmov s3, w0
|
|
; CHECK-NEXT: cmeq v2.4s, v3.4s, v2.4s
|
|
; CHECK-NEXT: dup v2.4s, v2.s[0]
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = icmp eq i32 %a, %b
|
|
%e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
|
|
ret <4x i32> %e
|
|
}
|
|
|
|
define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov d2, x1
|
|
; CHECK-NEXT: fmov d3, x0
|
|
; CHECK-NEXT: cmeq d2, d3, d2
|
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = icmp eq i64 %a, %b
|
|
%e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
|
|
ret <1x i64> %e
|
|
}
|
|
|
|
define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov d2, x1
|
|
; CHECK-NEXT: fmov d3, x0
|
|
; CHECK-NEXT: cmeq v2.2d, v3.2d, v2.2d
|
|
; CHECK-NEXT: dup v2.2d, v2.d[0]
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = icmp eq i64 %a, %b
|
|
%e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
|
|
ret <2x i64> %e
|
|
}
|
|
|
|
define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v1f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0
|
|
; CHECK-NEXT: // kill: def $s1 killed $s1 def $d1
|
|
; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s
|
|
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = fcmp oeq float %a, %b
|
|
%e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
|
|
ret <1 x float> %e
|
|
}
|
|
|
|
define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0
|
|
; CHECK-NEXT: // kill: def $s1 killed $s1 def $d1
|
|
; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s
|
|
; CHECK-NEXT: dup v0.2s, v0.s[0]
|
|
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = fcmp oeq float %a, %b
|
|
%e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
|
|
ret <2 x float> %e
|
|
}
|
|
|
|
define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
|
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
|
|
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: dup v0.4s, v0.s[0]
|
|
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = fcmp oeq float %a, %b
|
|
%e = select i1 %cmp31, <4x float> %c, <4x float> %d
|
|
ret <4x float> %e
|
|
}
|
|
|
|
define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v4f32_icmp:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s2, w1
|
|
; CHECK-NEXT: fmov s3, w0
|
|
; CHECK-NEXT: cmeq v2.4s, v3.4s, v2.4s
|
|
; CHECK-NEXT: dup v2.4s, v2.s[0]
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = icmp eq i32 %a, %b
|
|
%e = select i1 %cmp31, <4x float> %c, <4x float> %d
|
|
ret <4x float> %e
|
|
}
|
|
|
|
define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcmeq d0, d0, d1
|
|
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = fcmp oeq double %a, %b
|
|
%e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
|
|
ret <1 x double> %e
|
|
}
|
|
|
|
define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v1f64_icmp:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov d2, x1
|
|
; CHECK-NEXT: fmov d3, x0
|
|
; CHECK-NEXT: cmeq d2, d3, d2
|
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = icmp eq i64 %a, %b
|
|
%e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
|
|
ret <1 x double> %e
|
|
}
|
|
|
|
define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
|
|
; CHECK-LABEL: test_select_cc_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-NEXT: fcmeq v0.2d, v0.2d, v1.2d
|
|
; CHECK-NEXT: dup v0.2d, v0.d[0]
|
|
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-NEXT: ret
|
|
%cmp31 = fcmp oeq double %a, %b
|
|
%e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
|
|
ret <2 x double> %e
|
|
}
|
|
|
|
; Special case: when the select condition is an icmp with i1 operands, don't
|
|
; do the comparison on vectors.
|
|
; Part of PR21549.
|
|
define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) {
|
|
; CHECK-LABEL: test_select_cc_v2i32_icmpi1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tst w0, #0x1
|
|
; CHECK-NEXT: csetm w8, ne
|
|
; CHECK-NEXT: dup v2.2s, w8
|
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
; CHECK-NEXT: ret
|
|
%cmp = icmp ne i1 %cc, 0
|
|
%e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
|
|
ret <2 x i32> %e
|
|
}
|
|
|
|
; Also make sure we support irregular/non-power-of-2 types such as v3f32.
|
|
define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
|
|
; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
|
|
; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3
|
|
; CHECK-NEXT: fcmeq v2.4s, v2.4s, v3.4s
|
|
; CHECK-NEXT: dup v2.4s, v2.s[0]
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%cc = fcmp oeq float %c1, %c2
|
|
%r = select i1 %cc, <3 x float> %a, <3 x float> %b
|
|
ret <3 x float> %r
|
|
}
|
|
|
|
define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
|
|
; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
|
|
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-NEXT: fcmeq v2.2d, v2.2d, v3.2d
|
|
; CHECK-NEXT: dup v2.2d, v2.d[0]
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%cc = fcmp oeq double %c1, %c2
|
|
%r = select i1 %cc, <3 x float> %a, <3 x float> %b
|
|
ret <3 x float> %r
|
|
}
|
|
|
|
attributes #0 = { nounwind}
|