mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[PowerPC] Add missing negate for VPERMXOR on little endian subtargets
This intrinsic is supposed to have the permute control vector complemented on little endian systems (as the ABI specifies and GCC implements). With the current code gen, the result vector is byte-reversed. Differential revision: https://reviews.llvm.org/D95004
This commit is contained in:
parent
2d325e321e
commit
e9b75e2e14
@ -1327,8 +1327,8 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
|
||||
int_ppc_altivec_crypto_vpmsumw, v4i32>;
|
||||
def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
|
||||
int_ppc_altivec_crypto_vpmsumd, v2i64>;
|
||||
def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
|
||||
int_ppc_altivec_crypto_vpermxor, v16i8>;
|
||||
def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC),
|
||||
"vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>;
|
||||
|
||||
// Vector doubleword integer pack and unpack.
|
||||
let hasSideEffects = 1 in {
|
||||
|
@ -2408,6 +2408,8 @@ def MrgWords {
|
||||
// arbitrarily chosen to be Big, Little.
|
||||
//
|
||||
// Predicate combinations available:
|
||||
// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr.
|
||||
// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr.
|
||||
// [HasVSX]
|
||||
// [HasVSX, IsBigEndian]
|
||||
// [HasVSX, IsLittleEndian]
|
||||
@ -2436,6 +2438,18 @@ def MrgWords {
|
||||
// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64]
|
||||
// [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]
|
||||
|
||||
// These Altivec patterns are here because we need a VSX instruction to match
|
||||
// the intrinsic (but only for little endian system).
|
||||
let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in
|
||||
def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
|
||||
v16i8:$b, v16i8:$c)),
|
||||
(v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC),
|
||||
(COPY_TO_REGCLASS $c, VSRC))))>;
|
||||
let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
|
||||
def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
|
||||
v16i8:$b, v16i8:$c)),
|
||||
(v16i8 (VPERMXOR $a, $b, $c))>;
|
||||
|
||||
let AddedComplexity = 400 in {
|
||||
// Valid for any VSX subtarget, regardless of endianness.
|
||||
let Predicates = [HasVSX] in {
|
||||
|
@ -1,7 +1,11 @@
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
|
||||
; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
|
||||
; FIXME: The original intent was to add a check-next for the blr after every check.
|
||||
; However, this currently fails since we don't eliminate stores of the unused
|
||||
@ -103,6 +107,7 @@ entry:
|
||||
%2 = load <16 x i8>, <16 x i8>* %c, align 16
|
||||
%3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
|
||||
ret <16 x i8> %3
|
||||
; CHECK-LE: xxlnor
|
||||
; CHECK: vpermxor 2,
|
||||
}
|
||||
|
||||
@ -127,6 +132,7 @@ entry:
|
||||
%6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
|
||||
%7 = bitcast <16 x i8> %6 to <8 x i16>
|
||||
ret <8 x i16> %7
|
||||
; CHECK-LE: xxlnor
|
||||
; CHECK: vpermxor 2,
|
||||
}
|
||||
|
||||
@ -148,6 +154,7 @@ entry:
|
||||
%6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
|
||||
%7 = bitcast <16 x i8> %6 to <4 x i32>
|
||||
ret <4 x i32> %7
|
||||
; CHECK-LE: xxlnor
|
||||
; CHECK: vpermxor 2,
|
||||
}
|
||||
|
||||
@ -169,6 +176,7 @@ entry:
|
||||
%6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
|
||||
%7 = bitcast <16 x i8> %6 to <2 x i64>
|
||||
ret <2 x i64> %7
|
||||
; CHECK-LE: xxlnor
|
||||
; CHECK: vpermxor 2,
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user