[PowerPC] Add missing negate for VPERMXOR on little endian subtargets

This intrinsic is supposed to have the permute control vector complemented on little endian systems (as the ABI specifies and GCC implements). With the current code gen, the result vector is byte-reversed. Differential revision: https://reviews.llvm.org/D95004
2025-01-31 20:51:52 +01:00 · 2021-01-25 12:22:19 -06:00 · 2021-01-25 12:22:19 -06:00 · e9b75e2e14
commit e9b75e2e14
parent 2d325e321e
3 changed files with 28 additions and 6 deletions
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@ -1327,8 +1327,8 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
                         int_ppc_altivec_crypto_vpmsumw, v4i32>;
 def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
                         int_ppc_altivec_crypto_vpmsumd, v2i64>;
-def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
-                         int_ppc_altivec_crypto_vpermxor, v16i8>;
+def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC),
+                        "vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>;

 // Vector doubleword integer pack and unpack.
 let hasSideEffects = 1 in {
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@ -2408,6 +2408,8 @@ def MrgWords {
 // arbitrarily chosen to be Big, Little.
 //
 // Predicate combinations available:
+// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr.
+// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr.
 // [HasVSX]
 // [HasVSX, IsBigEndian]
 // [HasVSX, IsLittleEndian]
@ -2436,6 +2438,18 @@ def MrgWords {
 // [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64]
 // [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]

+// These Altivec patterns are here because we need a VSX instruction to match
+// the intrinsic (but only for little endian system).
+let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in
+  def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
+                                                    v16i8:$b, v16i8:$c)),
+            (v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC),
+                                             (COPY_TO_REGCLASS $c, VSRC))))>;
+let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
+  def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
+                                                    v16i8:$b, v16i8:$c)),
+            (v16i8 (VPERMXOR $a, $b, $c))>;
+
 let AddedComplexity = 400 in {
 // Valid for any VSX subtarget, regardless of endianness.
 let Predicates = [HasVSX] in {
--- a/test/CodeGen/PowerPC/crypto_bifs.ll
+++ b/test/CodeGen/PowerPC/crypto_bifs.ll
@ -1,7 +1,11 @@
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
 ; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
 ; FIXME: The original intent was to add a check-next for the blr after every check.
 ; However, this currently fails since we don't eliminate stores of the unused
@ -103,6 +107,7 @@ entry:
  %2 = load <16 x i8>,  <16 x i8>* %c, align 16
  %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
  ret <16 x i8> %3
+; CHECK-LE: xxlnor
 ; CHECK: vpermxor 2,
 }

@ -127,6 +132,7 @@ entry:
  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
  %7 = bitcast <16 x i8> %6 to <8 x i16>
  ret <8 x i16> %7
+; CHECK-LE: xxlnor
 ; CHECK: vpermxor 2,
 }

@ -148,6 +154,7 @@ entry:
  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
  %7 = bitcast <16 x i8> %6 to <4 x i32>
  ret <4 x i32> %7
+; CHECK-LE: xxlnor
 ; CHECK: vpermxor 2,
 }

@ -169,6 +176,7 @@ entry:
  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
  %7 = bitcast <16 x i8> %6 to <2 x i64>
  ret <2 x i64> %7
+; CHECK-LE: xxlnor
 ; CHECK: vpermxor 2,
 }