llvm-mirror/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-linux -o - | FileCheck %s

define void @test(i32* %p1, i32* %p2) {
; CHECK-LABEL: test:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w8, #3
; CHECK-NEXT:    mov w9, #1
; CHECK-NEXT:    str w8, [x0]
; CHECK-NEXT:    str w9, [x1]
; CHECK-NEXT:    ret
  %tmp = shufflevector <1 x i32> <i32 1>, <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
  %tmp2 = shufflevector <3 x i32> <i32 2, i32 3, i32 4>, <3 x i32> %tmp, <3 x i32> <i32 0, i32 1, i32 3>
  %tmp3 = shufflevector <3 x i32> %tmp2, <3 x i32> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
  %tmp4 = shufflevector <6 x i32> undef, <6 x i32> %tmp3, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
  %tmp6 = extractelement <9 x i32> %tmp4, i32 7
  %tmp8 = extractelement <9 x i32> %tmp4, i32 8
  store i32 %tmp6, i32* %p1, align 4
  store i32 %tmp8, i32* %p2, align 4
  ret void
}

; Test case from PR41951
define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) {
; CHECK-LABEL: widen_shuffles_reduced:
; CHECK:       // %bb.0:
; CHECK-NEXT:    zip1 v2.4s, v0.4s, v1.4s
; CHECK-NEXT:    zip1 v0.4s, v1.4s, v0.4s
; CHECK-NEXT:    ext v0.16b, v0.16b, v2.16b, #8
; CHECK-NEXT:    ret
  %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>
  ret <4 x i32> %s3
}

define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) {
; CHECK-LABEL: zip_mask_check:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    trn2 v0.4s, v0.4s, v1.4s
; CHECK-NEXT:    fmla v0.4s, v0.4s, v0.4s
; CHECK-NEXT:    fmla v0.4s, v0.4s, v0.4s
; CHECK-NEXT:    str s0, [x2]
; CHECK-NEXT:    ret
  %tmp3 = load <3 x float>, <3 x float>* %p1, align 16
  %tmp4 = load <3 x float>, <3 x float>* %p2, align 4
  %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> <i32 1, i32 4, i32 undef, i32 undef>
  %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
  %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
  %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef)
  %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8)
  %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32>
  %tmp12 = extractelement <16 x i32> %tmp11, i32 0
  store i32 %tmp12, i32* %p3, align 4
  ret void
}

declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
[AArch64] Skip mask checks for masks with an odd number of elements. Some checks in isShuffleMaskLegal expect an even number of elements, e.g. isTRN_v_undef_Mask or isUZP_v_undef_Mask, otherwise they access invalid elements and crash. This patch adds checks to the impacted functions. Fixes PR41951 Reviewers: t.p.northover, dmgreen, samparker Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D60690 llvm-svn: 361235 2019-05-21 12:05:26 +02:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-linux -o - \| FileCheck %s`

			`define void @test(i32* %p1, i32* %p2) {`
			`; CHECK-LABEL: test:`
			`; CHECK: // %bb.0:`
			`; CHECK-NEXT: mov w8, #3`
			`; CHECK-NEXT: mov w9, #1`
			`; CHECK-NEXT: str w8, [x0]`
			`; CHECK-NEXT: str w9, [x1]`
			`; CHECK-NEXT: ret`
			`%tmp = shufflevector <1 x i32> <i32 1>, <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>`
			`%tmp2 = shufflevector <3 x i32> <i32 2, i32 3, i32 4>, <3 x i32> %tmp, <3 x i32> <i32 0, i32 1, i32 3>`
			`%tmp3 = shufflevector <3 x i32> %tmp2, <3 x i32> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>`
			`%tmp4 = shufflevector <6 x i32> undef, <6 x i32> %tmp3, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>`
			`%tmp6 = extractelement <9 x i32> %tmp4, i32 7`
			`%tmp8 = extractelement <9 x i32> %tmp4, i32 8`
			`store i32 %tmp6, i32* %p1, align 4`
			`store i32 %tmp8, i32* %p2, align 4`
			`ret void`
			`}`

			`; Test case from PR41951`
			`define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) {`
			`; CHECK-LABEL: widen_shuffles_reduced:`
			`; CHECK: // %bb.0:`
			`; CHECK-NEXT: zip1 v2.4s, v0.4s, v1.4s`
			`; CHECK-NEXT: zip1 v0.4s, v1.4s, v0.4s`
			`; CHECK-NEXT: ext v0.16b, v0.16b, v2.16b, #8`
			`; CHECK-NEXT: ret`
			`%s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>`
			`ret <4 x i32> %s3`
			`}`
[AArch64] Skip isZIPMask check for masks with an odd number of elements. We process 2 elements at a time and expect the number of elements to be even. Similar to D60690. Reviewers: dmgreen, samparker, t.p.northover Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D65400 llvm-svn: 367831 2019-08-05 13:12:23 +02:00
			`define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) {`
			`; CHECK-LABEL: zip_mask_check:`
			`; CHECK: // %bb.0:`
			`; CHECK-NEXT: ldr q0, [x0]`
			`; CHECK-NEXT: ldr d1, [x1]`
			`; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s`
			`; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s`
			`; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s`
			`; CHECK-NEXT: str s0, [x2]`
			`; CHECK-NEXT: ret`
			`%tmp3 = load <3 x float>, <3 x float>* %p1, align 16`
			`%tmp4 = load <3 x float>, <3 x float>* %p2, align 4`
			`%tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> <i32 1, i32 4, i32 undef, i32 undef>`
			`%tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>`
			`%tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 5>`
			`%tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef)`
			`%tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8)`
			`%tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>`
			`%tmp11 = bitcast <16 x float> %tmp10 to <16 x i32>`
			`%tmp12 = extractelement <16 x i32> %tmp11, i32 0`
			`store i32 %tmp12, i32* %p3, align 4`
			`ret void`
			`}`

			`declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1`