[Verifier] Fail on overrunning and invalid indices for {insert,extract} vector intrinsics

With regards to overrunning, the langref (llvm/docs/LangRef.rst) specifies: (llvm.experimental.vector.insert) Elements ``idx`` through (``idx`` + num_elements(``subvec``) - 1) must be valid ``vec`` indices. If this condition cannot be determined statically but is false at runtime, then the result vector is undefined. (llvm.experimental.vector.extract) Elements ``idx`` through (``idx`` + num_elements(result_type) - 1) must be valid vector indices. If this condition cannot be determined statically but is false at runtime, then the result vector is undefined. For the non-mixed cases (e.g. inserting/extracting a scalable into/from another scalable, or inserting/extracting a fixed into/from another fixed), it is possible to statically check whether or not the above conditions are met. This was previously missing from the verifier, and if the conditions were found to be false, the result of the insertion/extraction would be replaced with an undef. With regards to invalid indices, the langref (llvm/docs/LangRef.rst) specifies: (llvm.experimental.vector.insert) ``idx`` represents the starting element number at which ``subvec`` will be inserted. ``idx`` must be a constant multiple of ``subvec``'s known minimum vector length. (llvm.experimental.vector.extract) The ``idx`` specifies the starting element number within ``vec`` from which a subvector is extracted. ``idx`` must be a constant multiple of the known-minimum vector length of the result type. Similarly, these conditions were not previously enforced in the verifier. In some circumstances, invalid indices were permitted silently, and in other circumstances, an undef was spawned where a verifier error would have been preferred. This commit adds verifier checks to enforce the constraints above. Differential Revision: https://reviews.llvm.org/D104468
2025-01-31 12:41:49 +01:00 · 2021-06-18 14:53:53 +00:00 · 2021-06-18 14:53:53 +00:00 · f07815e348
commit f07815e348
parent 3d23594a81
10 changed files with 202 additions and 161 deletions
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@ -5316,23 +5316,63 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
    break;
  }
  case Intrinsic::experimental_vector_insert: {
-    VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
-    VectorType *SubVecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
+    Value *Vec = Call.getArgOperand(0);
+    Value *SubVec = Call.getArgOperand(1);
+    Value *Idx = Call.getArgOperand(2);
+    unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();

+    VectorType *VecTy = cast<VectorType>(Vec->getType());
+    VectorType *SubVecTy = cast<VectorType>(SubVec->getType());
+
+    ElementCount VecEC = VecTy->getElementCount();
+    ElementCount SubVecEC = SubVecTy->getElementCount();
    Assert(VecTy->getElementType() == SubVecTy->getElementType(),
           "experimental_vector_insert parameters must have the same element "
           "type.",
           &Call);
+    Assert(IdxN % SubVecEC.getKnownMinValue() == 0,
+           "experimental_vector_insert index must be a constant multiple of "
+           "the subvector's known minimum vector length.");
+
+    // If this insertion is not the 'mixed' case where a fixed vector is
+    // inserted into a scalable vector, ensure that the insertion of the
+    // subvector does not overrun the parent vector.
+    if (VecEC.isScalable() == SubVecEC.isScalable()) {
+      Assert(
+          IdxN < VecEC.getKnownMinValue() &&
+              IdxN + SubVecEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
+          "subvector operand of experimental_vector_insert would overrun the "
+          "vector being inserted into.");
+    }
    break;
  }
  case Intrinsic::experimental_vector_extract: {
+    Value *Vec = Call.getArgOperand(0);
+    Value *Idx = Call.getArgOperand(1);
+    unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
+
    VectorType *ResultTy = cast<VectorType>(Call.getType());
-    VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
+    VectorType *VecTy = cast<VectorType>(Vec->getType());
+
+    ElementCount VecEC = VecTy->getElementCount();
+    ElementCount ResultEC = ResultTy->getElementCount();

    Assert(ResultTy->getElementType() == VecTy->getElementType(),
           "experimental_vector_extract result must have the same element "
           "type as the input vector.",
           &Call);
+    Assert(IdxN % ResultEC.getKnownMinValue() == 0,
+           "experimental_vector_extract index must be a constant multiple of "
+           "the result type's known minimum vector length.");
+
+    // If this extraction is not the 'mixed' case where a fixed vector is is
+    // extracted from a scalable vector, ensure that the extraction does not
+    // overrun the parent vector.
+    if (VecEC.isScalable() == ResultEC.isScalable()) {
+      Assert(IdxN < VecEC.getKnownMinValue() &&
+                 IdxN + ResultEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
+             "experimental_vector_extract would overrun.");
+    }
    break;
  }
  case Intrinsic::experimental_noalias_scope_decl: {
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@ -1876,13 +1876,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
      unsigned SubVecNumElts = SubVecTy->getNumElements();
      unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();

-      // The result of this call is undefined if IdxN is not a constant multiple
-      // of the SubVec's minimum vector length OR the insertion overruns Vec.
-      if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) {
-        replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
-        return eraseInstFromFunction(CI);
-      }
-
      // An insert that entirely overwrites Vec with SubVec is a nop.
      if (VecNumElts == SubVecNumElts) {
        replaceInstUsesWith(CI, SubVec);
@ -1930,14 +1923,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
      unsigned VecNumElts = VecTy->getNumElements();
      unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();

-      // The result of this call is undefined if IdxN is not a constant multiple
-      // of the result type's minimum vector length OR the extraction overruns
-      // Vec.
-      if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) {
-        replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
-        return eraseInstFromFunction(CI);
-      }
-
      // Extracting the entirety of Vec is a nop.
      if (VecNumElts == DstNumElts) {
        replaceInstUsesWith(CI, Vec);
--- a/test/CodeGen/AArch64/sve-extract-vector.ll
+++ b/test/CodeGen/AArch64/sve-extract-vector.ll
@ -12,18 +12,25 @@ define <2 x i64> @extract_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind {
 }

 ; Goes through memory currently; idx != 0.
-define <2 x i64> @extract_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec) nounwind {
-; CHECK-LABEL: extract_v2i64_nxv2i64_idx1:
+define <2 x i64> @extract_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec) nounwind {
+; CHECK-LABEL: extract_v2i64_nxv2i64_idx2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cntd x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #2
+; CHECK-NEXT:    cmp x9, #2 // =2
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    ldur q0, [sp, #8]
+; CHECK-NEXT:    lsl x8, x8, #3
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    ldr q0, [x9, x8]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 1)
+%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 2)
 ret <2 x i64> %retval
 }

@ -38,18 +45,25 @@ ret <4 x i32> %retval
 }

 ; Goes through memory currently; idx != 0.
-define <4 x i32> @extract_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec) nounwind {
-; CHECK-LABEL: extract_v4i32_nxv4i32_idx1:
+define <4 x i32> @extract_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: extract_v4i32_nxv4i32_idx4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cntw x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #4
+; CHECK-NEXT:    cmp x9, #4 // =4
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    ldur q0, [sp, #4]
+; CHECK-NEXT:    lsl x8, x8, #2
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    ldr q0, [x9, x8]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 1)
+  %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 4)
  ret <4 x i32> %retval
 }

@ -64,18 +78,25 @@ define <8 x i16> @extract_v8i16_nxv8i16(<vscale x 8 x i16> %vec) nounwind {
 }

 ; Goes through memory currently; idx != 0.
-define <8 x i16> @extract_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec) nounwind {
-; CHECK-LABEL: extract_v8i16_nxv8i16_idx1:
+define <8 x i16> @extract_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec) nounwind {
+; CHECK-LABEL: extract_v8i16_nxv8i16_idx8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cnth x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    cmp x9, #8 // =8
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    ldur q0, [sp, #2]
+; CHECK-NEXT:    lsl x8, x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    ldr q0, [x9, x8]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 1)
+  %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 8)
  ret <8 x i16> %retval
 }

@ -90,18 +111,24 @@ define <16 x i8> @extract_v16i8_nxv16i8(<vscale x 16 x i8> %vec) nounwind {
 }

 ; Goes through memory currently; idx != 0.
-define <16 x i8> @extract_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec) nounwind {
-; CHECK-LABEL: extract_v16i8_nxv16i8_idx1:
+define <16 x i8> @extract_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec) nounwind {
+; CHECK-LABEL: extract_v16i8_nxv16i8_idx16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    rdvl x9, #1
+; CHECK-NEXT:    sub x9, x9, #1 // =1
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov w8, #16
+; CHECK-NEXT:    cmp x9, #16 // =16
 ; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
-; CHECK-NEXT:    ldur q0, [sp, #1]
+; CHECK-NEXT:    csel x8, x9, x8, lo
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    ldr q0, [x9, x8]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 1)
+  %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 16)
  ret <16 x i8> %retval
 }

--- a/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/test/CodeGen/AArch64/sve-insert-vector.ll
@ -17,19 +17,26 @@ define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i6
  ret <vscale x 2 x i64> %retval
 }

-define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
-; CHECK-LABEL: insert_v2i64_nxv2i64_idx1:
+define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
+; CHECK-LABEL: insert_v2i64_nxv2i64_idx2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cntd x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #2
+; CHECK-NEXT:    cmp x9, #2 // =2
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    lsl x8, x8, #3
+; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    stur q1, [sp, #8]
+; CHECK-NEXT:    str q1, [x9, x8]
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 1)
+  %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 2)
  ret <vscale x 2 x i64> %retval
 }

@ -49,19 +56,26 @@ define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<vscale x 4 x i32> %vec, <4 x i3
  ret <vscale x 4 x i32> %retval
 }

-define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
-; CHECK-LABEL: insert_v4i32_nxv4i32_idx1:
+define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
+; CHECK-LABEL: insert_v4i32_nxv4i32_idx4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cntw x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #4
+; CHECK-NEXT:    cmp x9, #4 // =4
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    lsl x8, x8, #2
+; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    stur q1, [sp, #4]
+; CHECK-NEXT:    str q1, [x9, x8]
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 1)
+  %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 4)
  ret <vscale x 4 x i32> %retval
 }

@ -81,19 +95,26 @@ define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<vscale x 8 x i16> %vec, <8 x i1
  ret <vscale x 8 x i16> %retval
 }

-define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
-; CHECK-LABEL: insert_v8i16_nxv8i16_idx1:
+define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
+; CHECK-LABEL: insert_v8i16_nxv8i16_idx8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cnth x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    cmp x9, #8 // =8
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    lsl x8, x8, #1
+; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    stur q1, [sp, #2]
+; CHECK-NEXT:    str q1, [x9, x8]
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 1)
+  %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 8)
  ret <vscale x 8 x i16> %retval
 }

@ -113,19 +134,25 @@ define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<vscale x 16 x i8> %vec, <16 x i
  ret <vscale x 16 x i8> %retval
 }

-define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
-; CHECK-LABEL: insert_v16i8_nxv16i8_idx1:
+define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
+; CHECK-LABEL: insert_v16i8_nxv16i8_idx16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    rdvl x9, #1
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #16
+; CHECK-NEXT:    cmp x9, #16 // =16
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    csel x8, x9, x8, lo
+; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
-; CHECK-NEXT:    stur q1, [sp, #1]
+; CHECK-NEXT:    str q1, [x9, x8]
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 1)
+  %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 16)
  ret <vscale x 16 x i8> %retval
 }

--- a/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
+++ b/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
@ -297,23 +297,6 @@ define void @extract_v8i1_nxv2i1_0(<vscale x 2 x i1> %x, <8 x i1>* %y) {
  ret void
 }

-define void @extract_v8i1_nxv2i1_2(<vscale x 2 x i1> %x, <8 x i1>* %y) {
-; CHECK-LABEL: extract_v8i1_nxv2i1_2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv.v.i v25, 0
-; CHECK-NEXT:    vmerge.vim v25, v25, 1, v0
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf4, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v25, 2
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vmsne.vi v25, v25, 0
-; CHECK-NEXT:    vse1.v v25, (a0)
-; CHECK-NEXT:    ret
-  %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
-  store <8 x i1> %c, <8 x i1>* %y
-  ret void
-}
-
 define void @extract_v8i1_nxv64i1_0(<vscale x 64 x i1> %x, <8 x i1>* %y) {
 ; CHECK-LABEL: extract_v8i1_nxv64i1_0:
 ; CHECK:       # %bb.0:
--- a/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@ -68,31 +68,6 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, <8 x
  ret <vscale x 8 x i32> %v
 }

-define <vscale x 8 x i32> @insert_nxv8i32_v8i32_4(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
-; LMULMAX2-LABEL: insert_nxv8i32_v8i32_4:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX2-NEXT:    vle32.v v28, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 12, e32, m4, tu, mu
-; LMULMAX2-NEXT:    vslideup.vi v8, v28, 4
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: insert_nxv8i32_v8i32_4:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-NEXT:    vle32.v v28, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v12, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 8, e32, m4, tu, mu
-; LMULMAX1-NEXT:    vslideup.vi v8, v28, 4
-; LMULMAX1-NEXT:    vsetivli zero, 12, e32, m4, tu, mu
-; LMULMAX1-NEXT:    vslideup.vi v8, v12, 8
-; LMULMAX1-NEXT:    ret
-  %sv = load <8 x i32>, <8 x i32>* %svp
-  %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 4)
-  ret <vscale x 8 x i32> %v
-}
-
 define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
 ; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8:
 ; LMULMAX2:       # %bb.0:
@ -509,28 +484,6 @@ define <vscale x 2 x i1> @insert_nxv2i1_v4i1_0(<vscale x 2 x i1> %v, <4 x i1>* %
  ret <vscale x 2 x i1> %c
 }

-define <vscale x 2 x i1> @insert_nxv2i1_v4i1_6(<vscale x 2 x i1> %v, <4 x i1>* %svp) {
-; CHECK-LABEL: insert_nxv2i1_v4i1_6:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT:    vle1.v v27, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv.v.i v25, 0
-; CHECK-NEXT:    vmerge.vim v25, v25, 1, v0
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv.v.i v26, 0
-; CHECK-NEXT:    vmv1r.v v0, v27
-; CHECK-NEXT:    vmerge.vim v26, v26, 1, v0
-; CHECK-NEXT:    vsetivli zero, 10, e8, mf4, tu, mu
-; CHECK-NEXT:    vslideup.vi v25, v26, 6
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vmsne.vi v0, v25, 0
-; CHECK-NEXT:    ret
-  %sv = load <4 x i1>, <4 x i1>* %svp
-  %c = call <vscale x 2 x i1> @llvm.experimental.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 6)
-  ret <vscale x 2 x i1> %c
-}
-
 define <vscale x 8 x i1> @insert_nxv8i1_v4i1_0(<vscale x 8 x i1> %v, <8 x i1>* %svp) {
 ; CHECK-LABEL: insert_nxv8i1_v4i1_0:
 ; CHECK:       # %bb.0:
--- a/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
+++ b/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
@ -289,7 +289,7 @@ define <vscale x 2 x i1> @dupq_neg7() #0 {
 ; CHECK-NEXT: ret
  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
  %2 = tail call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef,
-    <2 x i64> <i64 1, i64 1>, i64 1)
+    <2 x i64> <i64 1, i64 1>, i64 2)
  %3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %2 , i64 0)
  %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
  %5 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %3, <vscale x 2 x i64> %4)
--- a/test/Transforms/InstCombine/canonicalize-vector-extract.ll
+++ b/test/Transforms/InstCombine/canonicalize-vector-extract.ll
@ -100,29 +100,6 @@ define <3 x i32> @valid_extraction_h(<8 x i32> %vec) {
  ret <3 x i32> %1
 }

-; ============================================================================ ;
-; Invalid canonicalizations
-; ============================================================================ ;
-
-; Idx must be the be a constant multiple of the destination vector's length,
-; otherwise the result is undefined.
-define <4 x i32> @idx_not_constant_multiple(<8 x i32> %vec) {
-; CHECK-LABEL: @idx_not_constant_multiple(
-; CHECK-NEXT:    ret <4 x i32> undef
-;
-  %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1)
-  ret <4 x i32> %1
-}
-
-; If the extraction overruns the vector, the result is undefined.
-define <10 x i32> @extract_overrun(<8 x i32> %vec) {
-; CHECK-LABEL: @extract_overrun(
-; CHECK-NEXT:    ret <10 x i32> undef
-;
-  %1 = call <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 0)
-  ret <10 x i32> %1
-}
-
 ; ============================================================================ ;
 ; Scalable cases
 ; ============================================================================ ;
--- a/test/Transforms/InstCombine/canonicalize-vector-insert.ll
+++ b/test/Transforms/InstCombine/canonicalize-vector-insert.ll
@ -108,29 +108,6 @@ define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) {
  ret <8 x i32> %1
 }

-; ============================================================================ ;
-; Invalid canonicalizations
-; ============================================================================ ;
-
-; Idx must be the be a constant multiple of the subvector's minimum vector
-; length, otherwise the result is undefined.
-define <8 x i32> @idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) {
-; CHECK-LABEL: @idx_not_constant_multiple(
-; CHECK-NEXT:    ret <8 x i32> undef
-;
-  %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2)
-  ret <8 x i32> %1
-}
-
-; If the insertion overruns the vector, the result is undefined.
-define <8 x i32> @insert_overrun(<8 x i32> %vec, <8 x i32> %subvec) {
-; CHECK-LABEL: @insert_overrun(
-; CHECK-NEXT:    ret <8 x i32> undef
-;
-  %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 4)
-  ret <8 x i32> %1
-}
-
 ; ============================================================================ ;
 ; Scalable cases
 ; ============================================================================ ;
--- a/test/Verifier/insert-extract-intrinsics-invalid.ll
+++ b/test/Verifier/insert-extract-intrinsics-invalid.ll
@ -0,0 +1,72 @@
+; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
+
+;
+; Test that extractions/insertion indices are validated.
+;
+
+; CHECK: experimental_vector_extract index must be a constant multiple of the result type's known minimum vector length.
+define <4 x i32> @extract_idx_not_constant_multiple(<8 x i32> %vec) {
+  %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1)
+  ret <4 x i32> %1
+}
+
+; CHECK: experimental_vector_insert index must be a constant multiple of the subvector's known minimum vector length.
+define <8 x i32> @insert_idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) {
+  %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2)
+  ret <8 x i32> %1
+}
+
+;
+; Test that extractions/insertions which 'overrun' are captured.
+;
+
+; CHECK: experimental_vector_extract would overrun.
+define <3 x i32> @extract_overrun_fixed_fixed(<8 x i32> %vec) {
+  %1 = call <3 x i32> @llvm.experimental.vector.extract.v8i32.v3i32(<8 x i32> %vec, i64 6)
+  ret <3 x i32> %1
+}
+
+; CHECK: experimental_vector_extract would overrun.
+define <vscale x 3 x i32> @extract_overrun_scalable_scalable(<vscale x 8 x i32> %vec) {
+  %1 = call <vscale x 3 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv3i32(<vscale x 8 x i32> %vec, i64 6)
+  ret <vscale x 3 x i32> %1
+}
+
+; We cannot statically check whether or not an extraction of a fixed vector
+; from a scalable vector would overrun, because we can't compare the sizes of
+; the two. Therefore, this function should not raise verifier errors.
+; CHECK-NOT: experimental_vector_extract
+define <3 x i32> @extract_overrun_scalable_fixed(<vscale x 8 x i32> %vec) {
+  %1 = call <3 x i32> @llvm.experimental.vector.extract.nxv8i32.v3i32(<vscale x 8 x i32> %vec, i64 6)
+  ret <3 x i32> %1
+}
+
+; CHECK: subvector operand of experimental_vector_insert would overrun the vector being inserted into.
+define <8 x i32> @insert_overrun_fixed_fixed(<8 x i32> %vec, <3 x i32> %subvec) {
+  %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 6)
+  ret <8 x i32> %1
+}
+
+; CHECK: subvector operand of experimental_vector_insert would overrun the vector being inserted into.
+define <vscale x 8 x i32> @insert_overrun_scalable_scalable(<vscale x 8 x i32> %vec, <vscale x 3 x i32> %subvec) {
+  %1 = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv3i32(<vscale x 8 x i32> %vec, <vscale x 3 x i32> %subvec, i64 6)
+  ret <vscale x 8 x i32> %1
+}
+
+; We cannot statically check whether or not an insertion of a fixed vector into
+; a scalable vector would overrun, because we can't compare the sizes of the
+; two. Therefore, this function should not raise verifier errors.
+; CHECK-NOT: experimental_vector_insert
+define <vscale x 8 x i32> @insert_overrun_scalable_fixed(<vscale x 8 x i32> %vec, <3 x i32> %subvec) {
+  %1 = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.v3i32(<vscale x 8 x i32> %vec, <3 x i32> %subvec, i64 6)
+  ret <vscale x 8 x i32> %1
+}
+
+declare <vscale x 3 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv3i32(<vscale x 8 x i32>, i64)
+declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv3i32(<vscale x 8 x i32>, <vscale x 3 x i32>, i64)
+declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.v3i32(<vscale x 8 x i32>, <3 x i32>, i64)
+declare <3 x i32> @llvm.experimental.vector.extract.nxv8i32.v3i32(<vscale x 8 x i32>, i64)
+declare <3 x i32> @llvm.experimental.vector.extract.v8i32.v3i32(<8 x i32>, i64)
+declare <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32>, i64)
+declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32>, <3 x i32>, i64)
+declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64)