mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[InstSimplify] Add constant fold for extractelement + splat for scalable vectors
This patch allows that scalable vector can fold extractelement and constant splat only when the lane index is lower than the minimum number of elements of the vector. Differential Revision: https://reviews.llvm.org/D103180
This commit is contained in:
parent
434526729b
commit
7714042905
@ -907,14 +907,10 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
|
||||
}
|
||||
}
|
||||
|
||||
// CAZ of type ScalableVectorType and n < CAZ->getMinNumElements() =>
|
||||
// extractelt CAZ, n -> 0
|
||||
if (auto *ValSVTy = dyn_cast<ScalableVectorType>(Val->getType())) {
|
||||
if (!CIdx->uge(ValSVTy->getMinNumElements())) {
|
||||
if (auto *CAZ = dyn_cast<ConstantAggregateZero>(Val))
|
||||
return CAZ->getElementValue(CIdx->getZExtValue());
|
||||
}
|
||||
return nullptr;
|
||||
// Lane < Splat minimum vector width => extractelt Splat(x), Lane -> x
|
||||
if (CIdx->getValue().ult(ValVTy->getElementCount().getKnownMinValue())) {
|
||||
if (Constant *SplatVal = Val->getSplatValue())
|
||||
return SplatVal;
|
||||
}
|
||||
|
||||
return Val->getAggregateElement(CIdx);
|
||||
|
@ -13,7 +13,7 @@ define i32* @vector_splat_indices_v2i64_ext0(i32* %a) {
|
||||
|
||||
define i32* @vector_splat_indices_nxv2i64_ext0(i32* %a) {
|
||||
; CHECK-LABEL: @vector_splat_indices_nxv2i64_ext0(
|
||||
; CHECK-NEXT: [[RES:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 extractelement (<vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 4, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), i32 0)
|
||||
; CHECK-NEXT: [[RES:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 4
|
||||
; CHECK-NEXT: ret i32* [[RES]]
|
||||
;
|
||||
%tmp = insertelement <vscale x 2 x i64> poison, i64 4, i32 0
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -instcombine -S < %s | FileCheck %s
|
||||
; RUN: opt -S -instsimplify < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: definitely_in_bounds
|
||||
; CHECK: ret i8 0
|
||||
@ -11,3 +11,35 @@ define i8 @definitely_in_bounds() {
|
||||
define i8 @maybe_in_bounds() {
|
||||
ret i8 extractelement (<vscale x 16 x i8> zeroinitializer, i64 16)
|
||||
}
|
||||
|
||||
; Examples of extracting a lane from a splat constant
|
||||
|
||||
define i32 @extractconstant_shuffle_in_range(i32 %v) {
|
||||
; CHECK-LABEL: @extractconstant_shuffle_in_range(
|
||||
; CHECK-NEXT: ret i32 1024
|
||||
;
|
||||
%in = insertelement <vscale x 4 x i32> undef, i32 1024, i32 0
|
||||
%splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
||||
%r = extractelement <vscale x 4 x i32> %splat, i32 1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @extractconstant_shuffle_maybe_out_of_range(i32 %v) {
|
||||
; CHECK-LABEL: @extractconstant_shuffle_maybe_out_of_range(
|
||||
; CHECK-NEXT: ret i32 extractelement (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1024, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer), i32 4)
|
||||
;
|
||||
%in = insertelement <vscale x 4 x i32> undef, i32 1024, i32 0
|
||||
%splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
||||
%r = extractelement <vscale x 4 x i32> %splat, i32 4
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @extractconstant_shuffle_invalid_index(i32 %v) {
|
||||
; CHECK-LABEL: @extractconstant_shuffle_invalid_index(
|
||||
; CHECK-NEXT: ret i32 extractelement (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1024, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer), i32 -1)
|
||||
;
|
||||
%in = insertelement <vscale x 4 x i32> undef, i32 1024, i32 0
|
||||
%splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
||||
%r = extractelement <vscale x 4 x i32> %splat, i32 -1
|
||||
ret i32 %r
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user