[AArch64] Skip isZIPMask check for masks with an odd number of elements.

We process 2 elements at a time and expect the number of elements to be even. Similar to D60690. Reviewers: dmgreen, samparker, t.p.northover Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D65400 llvm-svn: 367831
2025-01-31 12:41:49 +01:00 · 2019-08-05 11:12:23 +00:00 · 2019-08-05 11:12:23 +00:00 · 625dcd33f6
commit 625dcd33f6
parent cc3dd960fc
2 changed files with 28 additions and 0 deletions
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -6312,6 +6312,8 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {

 static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
  unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts % 2 != 0)
+    return false;
  WhichResult = (M[0] == 0 ? 0 : 1);
  unsigned Idx = WhichResult * NumElts / 2;
  for (unsigned i = 0; i != NumElts; i += 2) {
--- a/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
+++ b/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
@ -31,3 +31,29 @@ define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) {
  %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>
  ret <4 x i32> %s3
 }
+
+define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) {
+; CHECK-LABEL: zip_mask_check:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    trn2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    fmla v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    str s0, [x2]
+; CHECK-NEXT:    ret
+  %tmp3 = load <3 x float>, <3 x float>* %p1, align 16
+  %tmp4 = load <3 x float>, <3 x float>* %p2, align 4
+  %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> <i32 1, i32 4, i32 undef, i32 undef>
+  %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
+  %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef)
+  %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8)
+  %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32>
+  %tmp12 = extractelement <16 x i32> %tmp11, i32 0
+  store i32 %tmp12, i32* %p3, align 4
+  ret void
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1