From 920c28fbd1240f097f8d972d44acb114e50c5c8e Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa@nec.com>
Date: Sat, 19 Dec 2020 01:12:25 +0900
Subject: [PATCH] [VE] Correct VMP allocation in calling conv

VE used to allocate VM1, VM2, VMP2 (VM4+VM5), and VM3.  This patch
corrects to allocate VM1, VM2, VMP2 (VM4+VM5), and VM6.  Also add
a regression test.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D93570
---
 lib/Target/VE/VECallingConv.td          | 4 ++--
 test/CodeGen/VE/Vector/fastcc_callee.ll | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/lib/Target/VE/VECallingConv.td b/lib/Target/VE/VECallingConv.td
index 6741d1cc8ea..93899c2cae3 100644
--- a/lib/Target/VE/VECallingConv.td
+++ b/lib/Target/VE/VECallingConv.td
@@ -116,7 +116,7 @@ def CC_VE_Fast : CallingConv<[
   // pair of vector mask --> generic vector mask registers
   CCIfType<[v512i1],
            CCAssignToRegWithShadow<[VMP1, VMP2, VMP3],
-                                   [VM1, VM1, VM3]>>,
+                                   [VM1, VM3, VM5]>>,
 
   // Follow the standard C CC for scalars.
   CCDelegateTo<CC_VE_C>
@@ -137,7 +137,7 @@ def RetCC_VE_Fast : CallingConv<[
   // pair of vector mask --> generic vector mask registers
   CCIfType<[v512i1],
            CCAssignToRegWithShadow<[VMP1, VMP2, VMP3],
-                                   [VM1, VM1, VM3]>>,
+                                   [VM1, VM3, VM5]>>,
 
   // Follow the standard C CC for scalars.
   CCDelegateTo<RetCC_VE_C>
diff --git a/test/CodeGen/VE/Vector/fastcc_callee.ll b/test/CodeGen/VE/Vector/fastcc_callee.ll
index c0ad247d0e7..aa7b4944e7e 100644
--- a/test/CodeGen/VE/Vector/fastcc_callee.ll
+++ b/test/CodeGen/VE/Vector/fastcc_callee.ll
@@ -137,3 +137,11 @@ define fastcc <512 x i1> @vreg_arg_v512i1_vmp3(<512 x i1> %vmp1, <512 x i1> %vmp
 ; CHECK-NEXT:    b.l.t (, %s10)
   ret <512 x i1> %vmp3
 }
+
+define fastcc <256 x i1> @vmp_cc_bug(<256 x i1> %vm1, <256 x i1> %vm2, <512 x i1> %vmp2, <256 x i1> %vm6) {
+; CHECK-LABEL: vmp_cc_bug:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andm %vm1, %vm0, %vm6
+; CHECK-NEXT:    b.l.t (, %s10)
+  ret <256 x i1> %vm6
+}