mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[VectorCombine][SVE] Do not fold bitcast shuffle for scalable type.
First, shuffle cost for scalable type is not known for scalable type; Second, we cannot reason if the narrowed shuffle mask for scalable type is a splat or not. E.g., Bitcast splat vector from type <vscale x 4 x i32> to <vscale x 8 x i16> will involve narrowing shuffle mask <vscale x 4 x i32> zeroinitializer to <vscale x 8 x i32> with element sequence of <0, 1, 0, 1, ...>, which cannot be reasoned if it's a valid splat or not. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D86995
This commit is contained in:
parent
60cb217dc7
commit
dc1e85f7b4
@ -434,11 +434,14 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
|
||||
m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))
|
||||
return false;
|
||||
|
||||
// Disallow non-vector casts and length-changing shuffles.
|
||||
// 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
|
||||
// scalable type is unknown; Second, we cannot reason if the narrowed shuffle
|
||||
// mask for scalable type is a splat or not.
|
||||
// 2) Disallow non-vector casts and length-changing shuffles.
|
||||
// TODO: We could allow any shuffle.
|
||||
auto *DestTy = dyn_cast<VectorType>(I.getType());
|
||||
auto *SrcTy = cast<VectorType>(V->getType());
|
||||
if (!DestTy || I.getOperand(0)->getType() != SrcTy)
|
||||
auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
|
||||
auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
|
||||
if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
|
||||
return false;
|
||||
|
||||
// The new shuffle must not cost more than the old shuffle. The bitcast is
|
||||
@ -447,10 +450,8 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
|
||||
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))
|
||||
return false;
|
||||
|
||||
// FIXME: it should be possible to implement the computation of the widened
|
||||
// shuffle mask in terms of ElementCount to work with scalable shuffles.
|
||||
unsigned DestNumElts = cast<FixedVectorType>(DestTy)->getNumElements();
|
||||
unsigned SrcNumElts = cast<FixedVectorType>(SrcTy)->getNumElements();
|
||||
unsigned DestNumElts = DestTy->getNumElements();
|
||||
unsigned SrcNumElts = SrcTy->getNumElements();
|
||||
SmallVector<int, 16> NewMask;
|
||||
if (SrcNumElts <= DestNumElts) {
|
||||
// The bitcast is from wide to narrow/equal elements. The shuffle mask can
|
||||
|
2
test/Transforms/VectorCombine/AArch64/lit.local.cfg
Normal file
2
test/Transforms/VectorCombine/AArch64/lit.local.cfg
Normal file
@ -0,0 +1,2 @@
|
||||
if not 'AArch64' in config.root.targets:
|
||||
config.unsupported = True
|
@ -0,0 +1,21 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -vector-combine -S -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; This test checks we are not crashing with TTI when trying to get shuffle cost.
|
||||
; This test also check that shuffle mask <vscale x 4 x i32> zeroinitializer is
|
||||
; not narrowed into <0, 1, 0, 1, ...>, which we cannot reason if it's a valid
|
||||
; splat or not.
|
||||
|
||||
define <vscale x 8 x i16> @bitcast_shuffle(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: @bitcast_shuffle(
|
||||
; CHECK-NEXT: [[I:%.*]] = shufflevector <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[R:%.*]] = bitcast <vscale x 4 x i32> [[I]] to <vscale x 8 x i16>
|
||||
; CHECK-NEXT: ret <vscale x 8 x i16> [[R]]
|
||||
;
|
||||
%i = shufflevector <vscale x 4 x i32> %a, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
||||
%r = bitcast <vscale x 4 x i32> %i to <vscale x 8 x i16>
|
||||
ret <vscale x 8 x i16> %r
|
||||
}
|
Loading…
Reference in New Issue
Block a user