1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[PowerPC] Add missing negate for VPERMXOR on little endian subtargets

This intrinsic is supposed to have the permute control vector complemented on
little endian systems (as the ABI specifies and GCC implements). With the
current code gen, the result vector is byte-reversed.

Differential revision: https://reviews.llvm.org/D95004
This commit is contained in:
Nemanja Ivanovic 2021-01-25 12:22:19 -06:00
parent 2d325e321e
commit e9b75e2e14
3 changed files with 28 additions and 6 deletions

View File

@ -1327,8 +1327,8 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
int_ppc_altivec_crypto_vpmsumw, v4i32>;
def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
int_ppc_altivec_crypto_vpmsumd, v2i64>;
def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
int_ppc_altivec_crypto_vpermxor, v16i8>;
def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC),
"vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>;
// Vector doubleword integer pack and unpack.
let hasSideEffects = 1 in {

View File

@ -2408,6 +2408,8 @@ def MrgWords {
// arbitrarily chosen to be Big, Little.
//
// Predicate combinations available:
// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr.
// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr.
// [HasVSX]
// [HasVSX, IsBigEndian]
// [HasVSX, IsLittleEndian]
@ -2436,6 +2438,18 @@ def MrgWords {
// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64]
// [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]
// These Altivec patterns are here because we need a VSX instruction to match
// the intrinsic (but only for little endian system).
let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in
def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
v16i8:$b, v16i8:$c)),
(v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC),
(COPY_TO_REGCLASS $c, VSRC))))>;
let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
v16i8:$b, v16i8:$c)),
(v16i8 (VPERMXOR $a, $b, $c))>;
let AddedComplexity = 400 in {
// Valid for any VSX subtarget, regardless of endianness.
let Predicates = [HasVSX] in {

View File

@ -1,7 +1,11 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
; FIXME: The original intent was to add a check-next for the blr after every check.
; However, this currently fails since we don't eliminate stores of the unused
@ -103,6 +107,7 @@ entry:
%2 = load <16 x i8>, <16 x i8>* %c, align 16
%3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
ret <16 x i8> %3
; CHECK-LE: xxlnor
; CHECK: vpermxor 2,
}
@ -127,6 +132,7 @@ entry:
%6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
%7 = bitcast <16 x i8> %6 to <8 x i16>
ret <8 x i16> %7
; CHECK-LE: xxlnor
; CHECK: vpermxor 2,
}
@ -148,6 +154,7 @@ entry:
%6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
%7 = bitcast <16 x i8> %6 to <4 x i32>
ret <4 x i32> %7
; CHECK-LE: xxlnor
; CHECK: vpermxor 2,
}
@ -169,6 +176,7 @@ entry:
%6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
%7 = bitcast <16 x i8> %6 to <2 x i64>
ret <2 x i64> %7
; CHECK-LE: xxlnor
; CHECK: vpermxor 2,
}