mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
38c0bf429c
r212242 introduced a legalizer hook, originally to let AArch64 widen v1i{32,16,8} rather than scalarize, because the legalizer expected, when scalarizing the result of a conversion operation, to already have scalarized the operands. On AArch64, v1i64 is legal, so that commit ensured operations such as v1i32 = trunc v1i64 wouldn't assert. It did that by choosing to widen v1 types whenever possible. However, v1i1 types, for which there's no legal widened type, would still trigger the assert. This commit fixes that, by only scalarizing a trunc's result when the operand has already been scalarized, and introducing an extract_elt otherwise. This is similar to r205625. Fixes PR20777. llvm-svn: 220937
83 lines
2.8 KiB
LLVM
83 lines
2.8 KiB
LLVM
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
; An optimization in DAG Combiner to fold
|
|
; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...))
|
|
; will generate nodes like:
|
|
; v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64.
|
|
; And such nodes will be defaultly scalarized in type legalization. But such
|
|
; scalarization will cause an assertion failure, as v1i64 is a legal type in
|
|
; AArch64. We change the default behaviour from be scalarized to be widen.
|
|
|
|
; FIXME: Currently XTN is generated for v1i32, but it can be optimized.
|
|
; Just like v1i16 and v1i8, there is no XTN generated.
|
|
|
|
define <2 x i32> @test_v1i32_0(<1 x i64> %in0) {
|
|
; CHECK-LABEL: test_v1i32_0:
|
|
; CHECK: xtn v0.2s, v0.2d
|
|
%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
|
|
%2 = trunc <2 x i64> %1 to <2 x i32>
|
|
ret <2 x i32> %2
|
|
}
|
|
|
|
define <2 x i32> @test_v1i32_1(<1 x i64> %in0) {
|
|
; CHECK-LABEL: test_v1i32_1:
|
|
; CHECK: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: dup v0.2s, v0.s[0]
|
|
%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
|
|
%2 = trunc <2 x i64> %1 to <2 x i32>
|
|
ret <2 x i32> %2
|
|
}
|
|
|
|
define <4 x i16> @test_v1i16_0(<1 x i64> %in0) {
|
|
; CHECK-LABEL: test_v1i16_0:
|
|
; CHECK-NOT: xtn
|
|
%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
|
%2 = trunc <4 x i64> %1 to <4 x i16>
|
|
ret <4 x i16> %2
|
|
}
|
|
|
|
define <4 x i16> @test_v1i16_1(<1 x i64> %in0) {
|
|
; CHECK-LABEL: test_v1i16_1:
|
|
; CHECK-NOT: xtn
|
|
; CHECK: dup v0.4h, v0.h[0]
|
|
%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
|
|
%2 = trunc <4 x i64> %1 to <4 x i16>
|
|
ret <4 x i16> %2
|
|
}
|
|
|
|
define <8 x i8> @test_v1i8_0(<1 x i64> %in0) {
|
|
; CHECK-LABEL: test_v1i8_0:
|
|
; CHECK-NOT: xtn
|
|
%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%2 = trunc <8 x i64> %1 to <8 x i8>
|
|
ret <8 x i8> %2
|
|
}
|
|
|
|
define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
|
|
; CHECK-LABEL: test_v1i8_1:
|
|
; CHECK-NOT: xtn
|
|
; CHECK: dup v0.8b, v0.b[0]
|
|
%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%2 = trunc <8 x i64> %1 to <8 x i8>
|
|
ret <8 x i8> %2
|
|
}
|
|
|
|
; PR20777: v1i1 is also problematic, but we can't widen it, so we extract_elt
|
|
; the i64 out of the v1i64 operand, and truncate that scalar instead.
|
|
|
|
define <1 x i1> @test_v1i1_0(<1 x i64> %in0) {
|
|
; CHECK-LABEL: test_v1i1_0:
|
|
; CHECK: fmov w0, s0
|
|
%1 = trunc <1 x i64> %in0 to <1 x i1>
|
|
ret <1 x i1> %1
|
|
}
|
|
|
|
define i1 @test_v1i1_1(<1 x i64> %in0) {
|
|
; CHECK-LABEL: test_v1i1_1:
|
|
; CHECK: fmov [[REG:w[0-9]+]], s0
|
|
%1 = trunc <1 x i64> %in0 to <1 x i1>
|
|
; CHECK: and w0, [[REG]], #0x1
|
|
%2 = extractelement <1 x i1> %1, i32 0
|
|
ret i1 %2
|
|
}
|