1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[Verifier] Fail on overrunning and invalid indices for {insert,extract} vector intrinsics

With regards to overrunning, the langref (llvm/docs/LangRef.rst)
specifies:

   (llvm.experimental.vector.insert)
   Elements ``idx`` through (``idx`` + num_elements(``subvec``) - 1)
   must be valid ``vec`` indices. If this condition cannot be determined
   statically but is false at runtime, then the result vector is
   undefined.

   (llvm.experimental.vector.extract)
   Elements ``idx`` through (``idx`` + num_elements(result_type) - 1)
   must be valid vector indices. If this condition cannot be determined
   statically but is false at runtime, then the result vector is
   undefined.

For the non-mixed cases (e.g. inserting/extracting a scalable into/from
another scalable, or inserting/extracting a fixed into/from another
fixed), it is possible to statically check whether or not the above
conditions are met. This was previously missing from the verifier, and
if the conditions were found to be false, the result of the
insertion/extraction would be replaced with an undef.

With regards to invalid indices, the langref (llvm/docs/LangRef.rst)
specifies:

    (llvm.experimental.vector.insert)
    ``idx`` represents the starting element number at which ``subvec``
    will be inserted. ``idx`` must be a constant multiple of
    ``subvec``'s known minimum vector length.

    (llvm.experimental.vector.extract)
    The ``idx`` specifies the starting element number within ``vec``
    from which a subvector is extracted. ``idx`` must be a constant
    multiple of the known-minimum vector length of the result type.

Similarly, these conditions were not previously enforced in the
verifier. In some circumstances, invalid indices were permitted
silently, and in other circumstances, an undef was spawned where a
verifier error would have been preferred.

This commit adds verifier checks to enforce the constraints above.

Differential Revision: https://reviews.llvm.org/D104468
This commit is contained in:
Joe Ellis 2021-06-18 14:53:53 +00:00
parent 3d23594a81
commit f07815e348
10 changed files with 202 additions and 161 deletions

View File

@ -5316,23 +5316,63 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break; break;
} }
case Intrinsic::experimental_vector_insert: { case Intrinsic::experimental_vector_insert: {
VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType()); Value *Vec = Call.getArgOperand(0);
VectorType *SubVecTy = cast<VectorType>(Call.getArgOperand(1)->getType()); Value *SubVec = Call.getArgOperand(1);
Value *Idx = Call.getArgOperand(2);
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
VectorType *VecTy = cast<VectorType>(Vec->getType());
VectorType *SubVecTy = cast<VectorType>(SubVec->getType());
ElementCount VecEC = VecTy->getElementCount();
ElementCount SubVecEC = SubVecTy->getElementCount();
Assert(VecTy->getElementType() == SubVecTy->getElementType(), Assert(VecTy->getElementType() == SubVecTy->getElementType(),
"experimental_vector_insert parameters must have the same element " "experimental_vector_insert parameters must have the same element "
"type.", "type.",
&Call); &Call);
Assert(IdxN % SubVecEC.getKnownMinValue() == 0,
"experimental_vector_insert index must be a constant multiple of "
"the subvector's known minimum vector length.");
// If this insertion is not the 'mixed' case where a fixed vector is
// inserted into a scalable vector, ensure that the insertion of the
// subvector does not overrun the parent vector.
if (VecEC.isScalable() == SubVecEC.isScalable()) {
Assert(
IdxN < VecEC.getKnownMinValue() &&
IdxN + SubVecEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
"subvector operand of experimental_vector_insert would overrun the "
"vector being inserted into.");
}
break; break;
} }
case Intrinsic::experimental_vector_extract: { case Intrinsic::experimental_vector_extract: {
Value *Vec = Call.getArgOperand(0);
Value *Idx = Call.getArgOperand(1);
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
VectorType *ResultTy = cast<VectorType>(Call.getType()); VectorType *ResultTy = cast<VectorType>(Call.getType());
VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType()); VectorType *VecTy = cast<VectorType>(Vec->getType());
ElementCount VecEC = VecTy->getElementCount();
ElementCount ResultEC = ResultTy->getElementCount();
Assert(ResultTy->getElementType() == VecTy->getElementType(), Assert(ResultTy->getElementType() == VecTy->getElementType(),
"experimental_vector_extract result must have the same element " "experimental_vector_extract result must have the same element "
"type as the input vector.", "type as the input vector.",
&Call); &Call);
Assert(IdxN % ResultEC.getKnownMinValue() == 0,
"experimental_vector_extract index must be a constant multiple of "
"the result type's known minimum vector length.");
// If this extraction is not the 'mixed' case where a fixed vector is is
// extracted from a scalable vector, ensure that the extraction does not
// overrun the parent vector.
if (VecEC.isScalable() == ResultEC.isScalable()) {
Assert(IdxN < VecEC.getKnownMinValue() &&
IdxN + ResultEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
"experimental_vector_extract would overrun.");
}
break; break;
} }
case Intrinsic::experimental_noalias_scope_decl: { case Intrinsic::experimental_noalias_scope_decl: {

View File

@ -1876,13 +1876,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
unsigned SubVecNumElts = SubVecTy->getNumElements(); unsigned SubVecNumElts = SubVecTy->getNumElements();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue(); unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
// The result of this call is undefined if IdxN is not a constant multiple
// of the SubVec's minimum vector length OR the insertion overruns Vec.
if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) {
replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
return eraseInstFromFunction(CI);
}
// An insert that entirely overwrites Vec with SubVec is a nop. // An insert that entirely overwrites Vec with SubVec is a nop.
if (VecNumElts == SubVecNumElts) { if (VecNumElts == SubVecNumElts) {
replaceInstUsesWith(CI, SubVec); replaceInstUsesWith(CI, SubVec);
@ -1930,14 +1923,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
unsigned VecNumElts = VecTy->getNumElements(); unsigned VecNumElts = VecTy->getNumElements();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue(); unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
// The result of this call is undefined if IdxN is not a constant multiple
// of the result type's minimum vector length OR the extraction overruns
// Vec.
if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) {
replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
return eraseInstFromFunction(CI);
}
// Extracting the entirety of Vec is a nop. // Extracting the entirety of Vec is a nop.
if (VecNumElts == DstNumElts) { if (VecNumElts == DstNumElts) {
replaceInstUsesWith(CI, Vec); replaceInstUsesWith(CI, Vec);

View File

@ -12,18 +12,25 @@ define <2 x i64> @extract_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind {
} }
; Goes through memory currently; idx != 0. ; Goes through memory currently; idx != 0.
define <2 x i64> @extract_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec) nounwind { define <2 x i64> @extract_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec) nounwind {
; CHECK-LABEL: extract_v2i64_nxv2i64_idx1: ; CHECK-LABEL: extract_v2i64_nxv2i64_idx2:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x9
; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: mov w8, #2
; CHECK-NEXT: cmp x9, #2 // =2
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: ldur q0, [sp, #8] ; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 1) %retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 2)
ret <2 x i64> %retval ret <2 x i64> %retval
} }
@ -38,18 +45,25 @@ ret <4 x i32> %retval
} }
; Goes through memory currently; idx != 0. ; Goes through memory currently; idx != 0.
define <4 x i32> @extract_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec) nounwind { define <4 x i32> @extract_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec) nounwind {
; CHECK-LABEL: extract_v4i32_nxv4i32_idx1: ; CHECK-LABEL: extract_v4i32_nxv4i32_idx4:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntw x9
; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: mov w8, #4
; CHECK-NEXT: cmp x9, #4 // =4
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: ldur q0, [sp, #4] ; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 1) %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 4)
ret <4 x i32> %retval ret <4 x i32> %retval
} }
@ -64,18 +78,25 @@ define <8 x i16> @extract_v8i16_nxv8i16(<vscale x 8 x i16> %vec) nounwind {
} }
; Goes through memory currently; idx != 0. ; Goes through memory currently; idx != 0.
define <8 x i16> @extract_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec) nounwind { define <8 x i16> @extract_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec) nounwind {
; CHECK-LABEL: extract_v8i16_nxv8i16_idx1: ; CHECK-LABEL: extract_v8i16_nxv8i16_idx8:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cnth x9
; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: mov w8, #8
; CHECK-NEXT: cmp x9, #8 // =8
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: ldur q0, [sp, #2] ; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 1) %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 8)
ret <8 x i16> %retval ret <8 x i16> %retval
} }
@ -90,18 +111,24 @@ define <16 x i8> @extract_v16i8_nxv16i8(<vscale x 16 x i8> %vec) nounwind {
} }
; Goes through memory currently; idx != 0. ; Goes through memory currently; idx != 0.
define <16 x i8> @extract_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec) nounwind { define <16 x i8> @extract_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec) nounwind {
; CHECK-LABEL: extract_v16i8_nxv16i8_idx1: ; CHECK-LABEL: extract_v16i8_nxv16i8_idx16:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: rdvl x9, #1
; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov w8, #16
; CHECK-NEXT: cmp x9, #16 // =16
; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: ldur q0, [sp, #1] ; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 1) %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 16)
ret <16 x i8> %retval ret <16 x i8> %retval
} }

View File

@ -17,19 +17,26 @@ define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i6
ret <vscale x 2 x i64> %retval ret <vscale x 2 x i64> %retval
} }
define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind { define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
; CHECK-LABEL: insert_v2i64_nxv2i64_idx1: ; CHECK-LABEL: insert_v2i64_nxv2i64_idx2:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x9
; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: mov w8, #2
; CHECK-NEXT: cmp x9, #2 // =2
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: stur q1, [sp, #8] ; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 1) %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 2)
ret <vscale x 2 x i64> %retval ret <vscale x 2 x i64> %retval
} }
@ -49,19 +56,26 @@ define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<vscale x 4 x i32> %vec, <4 x i3
ret <vscale x 4 x i32> %retval ret <vscale x 4 x i32> %retval
} }
define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind { define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
; CHECK-LABEL: insert_v4i32_nxv4i32_idx1: ; CHECK-LABEL: insert_v4i32_nxv4i32_idx4:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntw x9
; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: mov w8, #4
; CHECK-NEXT: cmp x9, #4 // =4
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: stur q1, [sp, #4] ; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 1) %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 4)
ret <vscale x 4 x i32> %retval ret <vscale x 4 x i32> %retval
} }
@ -81,19 +95,26 @@ define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<vscale x 8 x i16> %vec, <8 x i1
ret <vscale x 8 x i16> %retval ret <vscale x 8 x i16> %retval
} }
define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind { define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
; CHECK-LABEL: insert_v8i16_nxv8i16_idx1: ; CHECK-LABEL: insert_v8i16_nxv8i16_idx8:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cnth x9
; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: mov w8, #8
; CHECK-NEXT: cmp x9, #8 // =8
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: stur q1, [sp, #2] ; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 1) %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 8)
ret <vscale x 8 x i16> %retval ret <vscale x 8 x i16> %retval
} }
@ -113,19 +134,25 @@ define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<vscale x 16 x i8> %vec, <16 x i
ret <vscale x 16 x i8> %retval ret <vscale x 16 x i8> %retval
} }
define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind { define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
; CHECK-LABEL: insert_v16i8_nxv16i8_idx1: ; CHECK-LABEL: insert_v16i8_nxv16i8_idx16:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: rdvl x9, #1
; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: mov w8, #16
; CHECK-NEXT: cmp x9, #16 // =16
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: stur q1, [sp, #1] ; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 1) %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 16)
ret <vscale x 16 x i8> %retval ret <vscale x 16 x i8> %retval
} }

View File

@ -297,23 +297,6 @@ define void @extract_v8i1_nxv2i1_0(<vscale x 2 x i1> %x, <8 x i1>* %y) {
ret void ret void
} }
define void @extract_v8i1_nxv2i1_2(<vscale x 2 x i1> %x, <8 x i1>* %y) {
; CHECK-LABEL: extract_v8i1_nxv2i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu
; CHECK-NEXT: vslidedown.vi v25, v25, 2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmsne.vi v25, v25, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
define void @extract_v8i1_nxv64i1_0(<vscale x 64 x i1> %x, <8 x i1>* %y) { define void @extract_v8i1_nxv64i1_0(<vscale x 64 x i1> %x, <8 x i1>* %y) {
; CHECK-LABEL: extract_v8i1_nxv64i1_0: ; CHECK-LABEL: extract_v8i1_nxv64i1_0:
; CHECK: # %bb.0: ; CHECK: # %bb.0:

View File

@ -68,31 +68,6 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, <8 x
ret <vscale x 8 x i32> %v ret <vscale x 8 x i32> %v
} }
define <vscale x 8 x i32> @insert_nxv8i32_v8i32_4(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
; LMULMAX2-LABEL: insert_nxv8i32_v8i32_4:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; LMULMAX2-NEXT: vle32.v v28, (a0)
; LMULMAX2-NEXT: vsetivli zero, 12, e32, m4, tu, mu
; LMULMAX2-NEXT: vslideup.vi v8, v28, 4
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: insert_nxv8i32_v8i32_4:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; LMULMAX1-NEXT: vle32.v v28, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle32.v v12, (a0)
; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, mu
; LMULMAX1-NEXT: vslideup.vi v8, v28, 4
; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, mu
; LMULMAX1-NEXT: vslideup.vi v8, v12, 8
; LMULMAX1-NEXT: ret
%sv = load <8 x i32>, <8 x i32>* %svp
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 4)
ret <vscale x 8 x i32> %v
}
define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, <8 x i32>* %svp) { define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8: ; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8:
; LMULMAX2: # %bb.0: ; LMULMAX2: # %bb.0:
@ -509,28 +484,6 @@ define <vscale x 2 x i1> @insert_nxv2i1_v4i1_0(<vscale x 2 x i1> %v, <4 x i1>* %
ret <vscale x 2 x i1> %c ret <vscale x 2 x i1> %c
} }
define <vscale x 2 x i1> @insert_nxv2i1_v4i1_6(<vscale x 2 x i1> %v, <4 x i1>* %svp) {
; CHECK-LABEL: insert_nxv2i1_v4i1_6:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vle1.v v27, (a0)
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmv.v.i v26, 0
; CHECK-NEXT: vmv1r.v v0, v27
; CHECK-NEXT: vmerge.vim v26, v26, 1, v0
; CHECK-NEXT: vsetivli zero, 10, e8, mf4, tu, mu
; CHECK-NEXT: vslideup.vi v25, v26, 6
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
; CHECK-NEXT: vmsne.vi v0, v25, 0
; CHECK-NEXT: ret
%sv = load <4 x i1>, <4 x i1>* %svp
%c = call <vscale x 2 x i1> @llvm.experimental.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 6)
ret <vscale x 2 x i1> %c
}
define <vscale x 8 x i1> @insert_nxv8i1_v4i1_0(<vscale x 8 x i1> %v, <8 x i1>* %svp) { define <vscale x 8 x i1> @insert_nxv8i1_v4i1_0(<vscale x 8 x i1> %v, <8 x i1>* %svp) {
; CHECK-LABEL: insert_nxv8i1_v4i1_0: ; CHECK-LABEL: insert_nxv8i1_v4i1_0:
; CHECK: # %bb.0: ; CHECK: # %bb.0:

View File

@ -289,7 +289,7 @@ define <vscale x 2 x i1> @dupq_neg7() #0 {
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, %2 = tail call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef,
<2 x i64> <i64 1, i64 1>, i64 1) <2 x i64> <i64 1, i64 1>, i64 2)
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %2 , i64 0) %3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %2 , i64 0)
%4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
%5 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %3, <vscale x 2 x i64> %4) %5 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %3, <vscale x 2 x i64> %4)

View File

@ -100,29 +100,6 @@ define <3 x i32> @valid_extraction_h(<8 x i32> %vec) {
ret <3 x i32> %1 ret <3 x i32> %1
} }
; ============================================================================ ;
; Invalid canonicalizations
; ============================================================================ ;
; Idx must be the be a constant multiple of the destination vector's length,
; otherwise the result is undefined.
define <4 x i32> @idx_not_constant_multiple(<8 x i32> %vec) {
; CHECK-LABEL: @idx_not_constant_multiple(
; CHECK-NEXT: ret <4 x i32> undef
;
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1)
ret <4 x i32> %1
}
; If the extraction overruns the vector, the result is undefined.
define <10 x i32> @extract_overrun(<8 x i32> %vec) {
; CHECK-LABEL: @extract_overrun(
; CHECK-NEXT: ret <10 x i32> undef
;
%1 = call <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 0)
ret <10 x i32> %1
}
; ============================================================================ ; ; ============================================================================ ;
; Scalable cases ; Scalable cases
; ============================================================================ ; ; ============================================================================ ;

View File

@ -108,29 +108,6 @@ define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) {
ret <8 x i32> %1 ret <8 x i32> %1
} }
; ============================================================================ ;
; Invalid canonicalizations
; ============================================================================ ;
; Idx must be the be a constant multiple of the subvector's minimum vector
; length, otherwise the result is undefined.
define <8 x i32> @idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) {
; CHECK-LABEL: @idx_not_constant_multiple(
; CHECK-NEXT: ret <8 x i32> undef
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2)
ret <8 x i32> %1
}
; If the insertion overruns the vector, the result is undefined.
define <8 x i32> @insert_overrun(<8 x i32> %vec, <8 x i32> %subvec) {
; CHECK-LABEL: @insert_overrun(
; CHECK-NEXT: ret <8 x i32> undef
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 4)
ret <8 x i32> %1
}
; ============================================================================ ; ; ============================================================================ ;
; Scalable cases ; Scalable cases
; ============================================================================ ; ; ============================================================================ ;

View File

@ -0,0 +1,72 @@
; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
;
; Test that extractions/insertion indices are validated.
;
; CHECK: experimental_vector_extract index must be a constant multiple of the result type's known minimum vector length.
define <4 x i32> @extract_idx_not_constant_multiple(<8 x i32> %vec) {
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1)
ret <4 x i32> %1
}
; CHECK: experimental_vector_insert index must be a constant multiple of the subvector's known minimum vector length.
define <8 x i32> @insert_idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) {
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2)
ret <8 x i32> %1
}
;
; Test that extractions/insertions which 'overrun' are captured.
;
; CHECK: experimental_vector_extract would overrun.
define <3 x i32> @extract_overrun_fixed_fixed(<8 x i32> %vec) {
%1 = call <3 x i32> @llvm.experimental.vector.extract.v8i32.v3i32(<8 x i32> %vec, i64 6)
ret <3 x i32> %1
}
; CHECK: experimental_vector_extract would overrun.
define <vscale x 3 x i32> @extract_overrun_scalable_scalable(<vscale x 8 x i32> %vec) {
%1 = call <vscale x 3 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv3i32(<vscale x 8 x i32> %vec, i64 6)
ret <vscale x 3 x i32> %1
}
; We cannot statically check whether or not an extraction of a fixed vector
; from a scalable vector would overrun, because we can't compare the sizes of
; the two. Therefore, this function should not raise verifier errors.
; CHECK-NOT: experimental_vector_extract
define <3 x i32> @extract_overrun_scalable_fixed(<vscale x 8 x i32> %vec) {
%1 = call <3 x i32> @llvm.experimental.vector.extract.nxv8i32.v3i32(<vscale x 8 x i32> %vec, i64 6)
ret <3 x i32> %1
}
; CHECK: subvector operand of experimental_vector_insert would overrun the vector being inserted into.
define <8 x i32> @insert_overrun_fixed_fixed(<8 x i32> %vec, <3 x i32> %subvec) {
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 6)
ret <8 x i32> %1
}
; CHECK: subvector operand of experimental_vector_insert would overrun the vector being inserted into.
define <vscale x 8 x i32> @insert_overrun_scalable_scalable(<vscale x 8 x i32> %vec, <vscale x 3 x i32> %subvec) {
%1 = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv3i32(<vscale x 8 x i32> %vec, <vscale x 3 x i32> %subvec, i64 6)
ret <vscale x 8 x i32> %1
}
; We cannot statically check whether or not an insertion of a fixed vector into
; a scalable vector would overrun, because we can't compare the sizes of the
; two. Therefore, this function should not raise verifier errors.
; CHECK-NOT: experimental_vector_insert
define <vscale x 8 x i32> @insert_overrun_scalable_fixed(<vscale x 8 x i32> %vec, <3 x i32> %subvec) {
%1 = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.v3i32(<vscale x 8 x i32> %vec, <3 x i32> %subvec, i64 6)
ret <vscale x 8 x i32> %1
}
declare <vscale x 3 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv3i32(<vscale x 8 x i32>, i64)
declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv3i32(<vscale x 8 x i32>, <vscale x 3 x i32>, i64)
declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.v3i32(<vscale x 8 x i32>, <3 x i32>, i64)
declare <3 x i32> @llvm.experimental.vector.extract.nxv8i32.v3i32(<vscale x 8 x i32>, i64)
declare <3 x i32> @llvm.experimental.vector.extract.v8i32.v3i32(<8 x i32>, i64)
declare <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32>, i64)
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32>, <3 x i32>, i64)
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64)