llvm-mirror/test/CodeGen/AArch64/arm64-build-vector.ll

; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s

; Check that building a vector from floats doesn't insert an unnecessary
; copy for lane zero.
define <4 x float>  @foo(float %a, float %b, float %c, float %d) nounwind {
; CHECK-LABEL: foo:
; CHECK-NOT: mov.s v0[0], v0[0]
; CHECK: mov.s v0[1], v1[0]
; CHECK: mov.s v0[2], v2[0]
; CHECK: mov.s v0[3], v3[0]
; CHECK: ret
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float %b, i32 1
  %3 = insertelement <4 x float> %2, float %c, i32 2
  %4 = insertelement <4 x float> %3, float %d, i32 3
  ret <4 x float> %4
}

define <8 x i16> @build_all_zero(<8 x i16> %a) #1 {
; CHECK-LABEL: build_all_zero:
; CHECK: mov	w[[GREG:[0-9]+]], #44672
; CHECK-NEXT:	fmov	s[[FREG:[0-9]+]], w[[GREG]]
; CHECK-NEXT:	mul.8h	v0, v0, v[[FREG]]
  %b = add <8 x i16> %a, <i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
  %c = mul <8 x i16> %b, <i16 -20864, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
  ret <8 x i16> %c
}

; There is an optimization in DAG Combiner as following:
;   fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
;        -> (BUILD_VECTOR A, B, ..., C, D, ...)
; This case checks when A,B and C,D are different types, there should be no
; assertion failure.
define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) {
; CHECK-LABEL: concat_2_build_vector:
; CHECK: movi
  %vshl_n = shl <4 x i16> %in0, <i16 8, i16 8, i16 8, i16 8>
  %vshl_n2 = shl <4 x i16> %vshl_n, <i16 9, i16 9, i16 9, i16 9>
  %shuffle.i = shufflevector <4 x i16> %vshl_n2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %shuffle.i
}

; The lowering of a widened f16 BUILD_VECTOR tries to optimize it by building
; an equivalent integer vector and BITCAST-ing that. This case checks that
; normalizing the vector generates a valid result. The choice of the
; constant prevents earlier passes from replacing the BUILD_VECTOR.
define void @widen_f16_build_vector(half* %addr) {
; CHECK-LABEL: widen_f16_build_vector:
; CHECK: mov    w[[GREG:[0-9]+]], #13294
; CHECK: dup.4h v0, w[[GREG]]
; CHECK: str    s0, [x0]
  %1 = bitcast half* %addr to <2 x half>*
  store <2 x half> <half 0xH33EE, half 0xH33EE>, <2 x half>* %1, align 2
  ret void
}

; Check that a single element vector is constructed with a mov
define <1 x i64> @single_element_vector_i64(<1 x i64> %arg) {
; CHECK-LABEL: single_element_vector_i64
; CHECK: orr w[[GREG:[0-9]+]], wzr, #0x1
; CHECK: fmov d[[DREG:[0-9]+]], x[[GREG]]
; CHECK: add d0, d0, d[[DREG]]
; CHECK: ret
entry:
  %add = add <1 x i64> %arg, <i64 1>
  ret <1 x i64> %add
}

define <1 x double> @single_element_vector_double(<1 x double> %arg) {
; CHECK-LABEL: single_element_vector_double
; CHECK: fmov d[[DREG:[0-9]+]], #1.00000000
; CHECK: fadd d0, d0, d[[DREG]]
; CHECK: ret
entry:
  %add = fadd <1 x double> %arg, <double 1.0>
  ret <1 x double> %add
}
[AARCH64] Enable AARCH64 lit tests on windows dev machines As discussed on PR27654, this patch fixes the triples of a lot of aarch64 tests and enables lit tests on windows This will hopefully help stop cases where windows developers break the aarch64 target Differential Revision: https://reviews.llvm.org/D22191 llvm-svn: 275973 2016-07-19 15:35:11 +02:00			`; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple \| FileCheck %s`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 11:18:08 +01:00
			`; Check that building a vector from floats doesn't insert an unnecessary`
			`; copy for lane zero.`
			`define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {`
			`; CHECK-LABEL: foo:`
[AArch64][TableGen] Skip tied result operands for InstAlias Summary: This patch fixes an issue so that the right alias is printed when the instruction has tied operands. It checks the number of operands in the resulting instruction as opposed to the alias, and then skips over tied operands that should not be printed in the alias. This allows to generate the preferred assembly syntax for the AArch64 'ins' instruction, which should always be displayed as 'mov' according to the ARM Architecture Reference Manual. Several unit tests have changed as a result, but only to reflect the preferred disassembly. Some other InstAlias patterns (movk/bic/orr) needed a slight adjustment to stop them becoming the default and breaking other unit tests. Please note that the patch is mostly the same as https://reviews.llvm.org/D29219 which was reverted because of an issue found when running TableGen with the Address Sanitizer. That issue has been addressed in this iteration of the patch. Reviewers: rengolin, stoklund, huntergr, SjoerdMeijer, rovka Reviewed By: rengolin, SjoerdMeijer Subscribers: fhahn, aemerson, javed.absar, kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D40030 llvm-svn: 318650 2017-11-20 15:36:40 +01:00			`; CHECK-NOT: mov.s v0[0], v0[0]`
			`; CHECK: mov.s v0[1], v1[0]`
			`; CHECK: mov.s v0[2], v2[0]`
			`; CHECK: mov.s v0[3], v3[0]`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 11:18:08 +01:00			`; CHECK: ret`
			`%1 = insertelement <4 x float> undef, float %a, i32 0`
			`%2 = insertelement <4 x float> %1, float %b, i32 1`
			`%3 = insertelement <4 x float> %2, float %c, i32 2`
			`%4 = insertelement <4 x float> %3, float %d, i32 3`
			`ret <4 x float> %4`
			`}`
[AArch64] Fix a build_vector pattern match fail caused by defect in isBuildVectorAllZeros(). llvm-svn: 211567 2014-06-24 07:37:27 +02:00
			`define <8 x i16> @build_all_zero(<8 x i16> %a) #1 {`
			`; CHECK-LABEL: build_all_zero:`
AArch64: allow MOV (imm) alias to be printed The backend has been around for years, it's pretty ridiculous that we can't even use the preferred form for printing "MOV" aliases. Unfortunately, TableGen can't handle the complex predicates when printing so it's a bunch of nasty C++. Oh well. llvm-svn: 272865 2016-06-16 03:42:25 +02:00			`; CHECK: mov w[[GREG:[0-9]+]], #44672`
[AArch64] Fix a build_vector pattern match fail caused by defect in isBuildVectorAllZeros(). llvm-svn: 211567 2014-06-24 07:37:27 +02:00			`; CHECK-NEXT: fmov s[[FREG:[0-9]+]], w[[GREG]]`
			`; CHECK-NEXT: mul.8h v0, v0, v[[FREG]]`
			`%b = add <8 x i16> %a, <i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>`
			`%c = mul <8 x i16> %b, <i16 -20864, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>`
			`ret <8 x i16> %c`
[AArch64]Fix an assertion failure in DAG Combiner about concating 2 build_vector. llvm-svn: 212677 2014-07-10 05:41:50 +02:00			`}`

			`; There is an optimization in DAG Combiner as following:`
			`; fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))`
			`; -> (BUILD_VECTOR A, B, ..., C, D, ...)`
			`; This case checks when A,B and C,D are different types, there should be no`
			`; assertion failure.`
			`define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) {`
			`; CHECK-LABEL: concat_2_build_vector:`
			`; CHECK: movi`
			`%vshl_n = shl <4 x i16> %in0, <i16 8, i16 8, i16 8, i16 8>`
			`%vshl_n2 = shl <4 x i16> %vshl_n, <i16 9, i16 9, i16 9, i16 9>`
			`%shuffle.i = shufflevector <4 x i16> %vshl_n2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>`
			`ret <8 x i16> %shuffle.i`
add support for -print-imm-hex for AArch64 Most immediates are printed in Aarch64InstPrinter using 'formatImm' macro, but not all of them. Implementation contains following rules: - floating point immediates are always printed as decimal - signed integer immediates are printed depends on flag settings (for negative values 'formatImm' macro prints the value as i.e -0x01 which may be convenient when imm is an address or offset) - logical immediates are always printed as hex - the 64-bit immediate for advSIMD, encoded in "a:b:c:d:e:f:g:h" is always printed as hex - the 64-bit immedaite in exception generation instructions like: brk, dcps1, dcps2, dcps3, hlt, hvc, smc, svc is always printed as hex - the rest of immediates is printed depends on availability of -print-imm-hex Signed-off-by: Maciej Gabka <maciej.gabka@arm.com> Signed-off-by: Paul Osmialowski <pawel.osmialowski@arm.com> Differential Revision: http://reviews.llvm.org/D16929 llvm-svn: 269446 2016-05-13 20:00:09 +02:00			`}`
[AArch64] Fix assertion failure on widened f16 BUILD_VECTOR Summary: Ensure that NormalizedBuildVector returns a BUILD_VECTOR with operands of the same type. This fixes an assertion failure in VerifySDNode. Reviewers: SjoerdMeijer, t.p.northover, javed.absar Reviewed By: SjoerdMeijer Subscribers: kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D50202 llvm-svn: 339013 2018-08-06 16:14:41 +02:00
			`; The lowering of a widened f16 BUILD_VECTOR tries to optimize it by building`
			`; an equivalent integer vector and BITCAST-ing that. This case checks that`
			`; normalizing the vector generates a valid result. The choice of the`
			`; constant prevents earlier passes from replacing the BUILD_VECTOR.`
			`define void @widen_f16_build_vector(half* %addr) {`
			`; CHECK-LABEL: widen_f16_build_vector:`
			`; CHECK: mov w[[GREG:[0-9]+]], #13294`
			`; CHECK: dup.4h v0, w[[GREG]]`
			`; CHECK: str s0, [x0]`
			`%1 = bitcast half* %addr to <2 x half>*`
			`store <2 x half> <half 0xH33EE, half 0xH33EE>, <2 x half>* %1, align 2`
			`ret void`
			`}`
[AArch64] Fix SelectionDAG infinite loop for v1i64 SCALAR_TO_VECTOR A consequence of r347274 is that SCALAR_TO_VECTOR can be converted into BUILD_VECTOR by SimplifyDemandedBits, but LowerBUILD_VECTOR can turn BUILD_VECTOR into SCALAR_TO_VECTOR so we get an infinite loop. Fix this by making LowerBUILD_VECTOR not do this transformation for those vectors that would get transformed back, i.e. BUILD_VECTOR of a single-element constant vector. Doing that means we get a DUP, which we then need to recognise in ISel as a copy. llvm-svn: 347456 2018-11-22 12:45:23 +01:00
			`; Check that a single element vector is constructed with a mov`
			`define <1 x i64> @single_element_vector_i64(<1 x i64> %arg) {`
			`; CHECK-LABEL: single_element_vector_i64`
			`; CHECK: orr w[[GREG:[0-9]+]], wzr, #0x1`
			`; CHECK: fmov d[[DREG:[0-9]+]], x[[GREG]]`
			`; CHECK: add d0, d0, d[[DREG]]`
			`; CHECK: ret`
			`entry:`
			`%add = add <1 x i64> %arg, <i64 1>`
			`ret <1 x i64> %add`
			`}`

			`define <1 x double> @single_element_vector_double(<1 x double> %arg) {`
			`; CHECK-LABEL: single_element_vector_double`
			`; CHECK: fmov d[[DREG:[0-9]+]], #1.00000000`
			`; CHECK: fadd d0, d0, d[[DREG]]`
			`; CHECK: ret`
			`entry:`
			`%add = fadd <1 x double> %arg, <double 1.0>`
			`ret <1 x double> %add`
			`}`