llvm-mirror/test/CodeGen/AArch64/aarch64-vuzp.ll

; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s

declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>)

; CHECK-LABEL: fun1:
; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
define i32 @fun1() {
entry:
  %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
  %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1
  store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1
  ret i32 undef
}

; CHECK-LABEL: fun2:
; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
define i32 @fun2() {
entry:
  %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
  %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1
  store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1
  ret i32 undef
}

; CHECK-LABEL: fun3:
; CHECK-NOT: uzp1
define i32 @fun3() {
entry:
  %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
  %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 15>
  %scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1
  store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1
  ret i32 undef
}

; CHECK-LABEL: fun4:
; CHECK-NOT: uzp2
define i32 @fun4() {
entry:
  %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
  %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 3, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1
  store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1
  ret i32 undef
}

; CHECK-LABEL: pr36582:
; Check that this does not ICE.
define void @pr36582(i8* %p1, i32* %p2) {
entry:
  %x = bitcast i8* %p1 to <8 x i8>*
  %wide.vec = load <8 x i8>, <8 x i8>* %x, align 1
  %strided.vec = shufflevector <8 x i8> %wide.vec, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %y = zext <4 x i8> %strided.vec to <4 x i32>
  %z = bitcast i32* %p2 to <4 x i32>*
  store <4 x i32> %y, <4 x i32>* %z, align 4
  ret void
}

; Check that this pattern is recognized as a VZIP and
; that the vector blend transform does not scramble the pattern.
; CHECK-LABEL: vzipNoBlend:
; CHECK: zip1
define <8 x i8> @vzipNoBlend(<8 x i8>* %A, <8 x i16>* %B) nounwind {
  %t = load <8 x i8>, <8 x i8>* %A
  %vzip = shufflevector <8 x i8> %t, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
  ret <8 x i8> %vzip
}
fix PR36582 The error occurs when reading i16 elements (as in the testcase) from a v8i8 with a pattern of <0,2,4,6>. As all the data in the vector is accessed, the operation is not a VUZP. The patch stops the pattern recognition of VUZP when EXTRACT_VECTOR_ELT has a different element type than BUILD_VECTOR. llvm-svn: 326722 2018-03-05 18:35:49 +01:00			`; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s \| FileCheck %s`

			`declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>)`
[AArch64] generate vuzp instead of mov when a BUILD_VECTOR is created out of a sequence of EXTRACT_VECTOR_ELT with a specific pattern sequence, either <0, 2, 4, ...> or <1, 3, 5, ...>, replace the BUILD_VECTOR with either vuzp1 or vuzp2. With this patch LLVM generates the following code for the first function fun1 in the testcase: adrp x8, .LCPI0_0 ldr q0, [x8, :lo12:.LCPI0_0] tbl v0.16b, { v0.16b }, v0.16b ext v1.16b, v0.16b, v0.16b, #8 uzp1 v0.8b, v0.8b, v1.8b str d0, [x8] ret Without this patch LLVM currently generates this code: adrp x8, .LCPI0_0 ldr q0, [x8, :lo12:.LCPI0_0] tbl v0.16b, { v0.16b }, v0.16b mov v1.16b, v0.16b mov v1.b[1], v0.b[2] mov v1.b[2], v0.b[4] mov v1.b[3], v0.b[6] mov v1.b[4], v0.b[8] mov v1.b[5], v0.b[10] mov v1.b[6], v0.b[12] mov v1.b[7], v0.b[14] str d1, [x8] ret llvm-svn: 326443 2018-03-01 16:47:39 +01:00
			`; CHECK-LABEL: fun1:`
			`; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b`
			`define i32 @fun1() {`
			`entry:`
			`%vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)`
			`%vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>`
			`%scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1`
			`store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1`
			`ret i32 undef`
			`}`

			`; CHECK-LABEL: fun2:`
			`; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b`
			`define i32 @fun2() {`
			`entry:`
			`%vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)`
			`%vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>`
			`%scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1`
			`store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1`
			`ret i32 undef`
			`}`

			`; CHECK-LABEL: fun3:`
			`; CHECK-NOT: uzp1`
			`define i32 @fun3() {`
			`entry:`
			`%vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)`
			`%vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 15>`
			`%scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1`
			`store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1`
			`ret i32 undef`
			`}`

			`; CHECK-LABEL: fun4:`
			`; CHECK-NOT: uzp2`
			`define i32 @fun4() {`
			`entry:`
			`%vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)`
			`%vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 3, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>`
			`%scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1`
			`store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1`
			`ret i32 undef`
			`}`

fix PR36582 The error occurs when reading i16 elements (as in the testcase) from a v8i8 with a pattern of <0,2,4,6>. As all the data in the vector is accessed, the operation is not a VUZP. The patch stops the pattern recognition of VUZP when EXTRACT_VECTOR_ELT has a different element type than BUILD_VECTOR. llvm-svn: 326722 2018-03-05 18:35:49 +01:00			`; CHECK-LABEL: pr36582:`
			`; Check that this does not ICE.`
			`define void @pr36582(i8* %p1, i32* %p2) {`
			`entry:`
			`%x = bitcast i8* %p1 to <8 x i8>*`
			`%wide.vec = load <8 x i8>, <8 x i8>* %x, align 1`
			`%strided.vec = shufflevector <8 x i8> %wide.vec, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>`
			`%y = zext <4 x i8> %strided.vec to <4 x i32>`
			`%z = bitcast i32* %p2 to <4 x i32>*`
			`store <4 x i32> %y, <4 x i32>* %z, align 4`
			`ret void`
			`}`
[x86][aarch64] ask the backend whether it has a vector blend instruction The code to match and produce more x86 vector blends was enabled for all architectures even though the transform may pessimize the code for other architectures that do not provide a vector blend instruction. Added an aarch64 testcase to check that a VZIP instruction is generated instead of byte movs. Differential Revision: https://reviews.llvm.org/D44118 llvm-svn: 327132 2018-03-09 15:29:21 +01:00
			`; Check that this pattern is recognized as a VZIP and`
			`; that the vector blend transform does not scramble the pattern.`
			`; CHECK-LABEL: vzipNoBlend:`
			`; CHECK: zip1`
			`define <8 x i8> @vzipNoBlend(<8 x i8>* %A, <8 x i16>* %B) nounwind {`
			`%t = load <8 x i8>, <8 x i8>* %A`
			`%vzip = shufflevector <8 x i8> %t, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>`
			`ret <8 x i8> %vzip`
			`}`