[mlir][Tensor] Check for out-of-bounds slice in `insert/extract_slice` verifier (#130487)

Also fix test cases that had invalid ops.
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index e3c8899..e5a32c0 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -2352,13 +2352,52 @@
   }
 }
 
+/// Verify that the offsets/sizes/strides-style access into the given tensor
+/// is in-bounds. Only static information is verified.
+static LogicalResult verifyInBoundsSlice(Operation *op,
+                                         RankedTensorType tensorType,
+                                         ArrayRef<int64_t> staticOffsets,
+                                         ArrayRef<int64_t> staticSizes,
+                                         ArrayRef<int64_t> staticStrides) {
+  for (int64_t i = 0, e = tensorType.getRank(); i < e; ++i) {
+    // Nothing to verify for dynamic source dims.
+    if (tensorType.isDynamicDim(i))
+      continue;
+    // Nothing to verify if the offset is dynamic.
+    if (ShapedType::isDynamic(staticOffsets[i]))
+      continue;
+    if (staticOffsets[i] >= tensorType.getDimSize(i))
+      return op->emitOpError("offset ")
+             << i << " is out-of-bounds: " << staticOffsets[i]
+             << " >= " << tensorType.getDimSize(i);
+    if (ShapedType::isDynamic(staticSizes[i]) ||
+        ShapedType::isDynamic(staticStrides[i]))
+      continue;
+    int64_t lastPos =
+        staticOffsets[i] + (staticSizes[i] - 1) * staticStrides[i];
+    if (lastPos >= tensorType.getDimSize(i))
+      return op->emitOpError("slice along dimension ")
+             << i << " runs out-of-bounds: " << lastPos
+             << " >= " << tensorType.getDimSize(i);
+  }
+  return success();
+}
+
 /// Verifier for ExtractSliceOp.
 LogicalResult ExtractSliceOp::verify() {
+  RankedTensorType sourceType = getSourceType();
+
   // Verify result type against inferred type.
   RankedTensorType expectedType = ExtractSliceOp::inferResultType(
-      getSourceType(), getMixedOffsets(), getMixedSizes(), getMixedStrides());
+      sourceType, getMixedOffsets(), getMixedSizes(), getMixedStrides());
   SliceVerificationResult result = isRankReducedType(expectedType, getType());
-  return produceSliceErrorMsg(result, *this, expectedType);
+  if (result != SliceVerificationResult::Success)
+    return produceSliceErrorMsg(result, *this, expectedType);
+
+  // Verify that offsets, sizes, strides do not run out-of-bounds with respect
+  // to the source tensor.
+  return verifyInBoundsSlice(getOperation(), sourceType, getStaticOffsets(),
+                             getStaticSizes(), getStaticStrides());
 }
 
 llvm::SmallBitVector ExtractSliceOp::getDroppedDims() {
@@ -2729,11 +2768,18 @@
 
 /// Verifier for InsertSliceOp.
 LogicalResult InsertSliceOp::verify() {
+  // Verify result type against inferred type.
   RankedTensorType expectedType;
   SliceVerificationResult result =
       verifyInsertSliceOp(getSourceType(), getType(), getStaticOffsets(),
                           getStaticSizes(), getStaticStrides(), &expectedType);
-  return produceSliceErrorMsg(result, *this, expectedType);
+  if (result != SliceVerificationResult::Success)
+    return produceSliceErrorMsg(result, *this, expectedType);
+
+  // Verify that offsets, sizes, strides do not run out-of-bounds with respect
+  // to the source tensor.
+  return verifyInBoundsSlice(getOperation(), getDestType(), getStaticOffsets(),
+                             getStaticSizes(), getStaticStrides());
 }
 
 /// If we have two consecutive InsertSliceOp writing to the same slice, we
@@ -3747,11 +3793,18 @@
     return this->emitError("expected ParallelCombiningOpInterface parent, got:")
            << *(getOperation()->getParentOp());
 
+  // Verify result type against inferred type.
   RankedTensorType expectedType;
   SliceVerificationResult result =
       verifyInsertSliceOp(getSourceType(), getDestType(), getStaticOffsets(),
                           getStaticSizes(), getStaticStrides(), &expectedType);
-  return produceSliceErrorMsg(result, *this, expectedType);
+  if (result != SliceVerificationResult::Success)
+    return produceSliceErrorMsg(result, *this, expectedType);
+
+  // Verify that offsets, sizes, strides do not run out-of-bounds with respect
+  // to the source tensor.
+  return verifyInBoundsSlice(getOperation(), getDestType(), getStaticOffsets(),
+                             getStaticSizes(), getStaticStrides());
 }
 
 void ParallelInsertSliceOp::getCanonicalizationPatterns(
diff --git a/mlir/test/Dialect/Tensor/bubble-up-extract-slice-op.mlir b/mlir/test/Dialect/Tensor/bubble-up-extract-slice-op.mlir
index 252e749..3900bc5 100644
--- a/mlir/test/Dialect/Tensor/bubble-up-extract-slice-op.mlir
+++ b/mlir/test/Dialect/Tensor/bubble-up-extract-slice-op.mlir
@@ -31,7 +31,7 @@
 
 func.func @no_bubble_up_extract_slice_on_stride(%src: tensor<60xf32>) -> tensor<1x1x5xf32> {
   %expand = tensor.expand_shape %src [[0, 1, 2]] output_shape [2, 3, 10] : tensor<60xf32> into tensor<2x3x10xf32>
-  %extract = tensor.extract_slice %expand[0, 0, 5][1, 1, 5][1, 1, 2] : tensor<2x3x10xf32> to tensor<1x1x5xf32>
+  %extract = tensor.extract_slice %expand[0, 0, 0][1, 1, 5][1, 1, 2] : tensor<2x3x10xf32> to tensor<1x1x5xf32>
   return %extract : tensor<1x1x5xf32>
 }
 
diff --git a/mlir/test/Dialect/Tensor/drop-redundant-insert-slice-rank-expansion.mlir b/mlir/test/Dialect/Tensor/drop-redundant-insert-slice-rank-expansion.mlir
index 88e5506..0496b93 100644
--- a/mlir/test/Dialect/Tensor/drop-redundant-insert-slice-rank-expansion.mlir
+++ b/mlir/test/Dialect/Tensor/drop-redundant-insert-slice-rank-expansion.mlir
@@ -51,16 +51,16 @@
 
 // -----
 
-func.func @no_fold_strided_insert_slice_of_extract_slice(%in : tensor<?x8x2x8xf32>, %dest : tensor<1x4x4xf32>) -> tensor<1x4x4xf32> {
+func.func @no_fold_strided_insert_slice_of_extract_slice(%in : tensor<?x8x2x8xf32>, %dest : tensor<1x15x15xf32>) -> tensor<1x15x15xf32> {
   %extracted_slice = tensor.extract_slice %in[0, 0, 0, 0] [1, 8, 1, 8] [1, 1, 1, 1] : tensor<?x8x2x8xf32> to tensor<8x8xf32>
-  %inserted_slice = tensor.insert_slice %extracted_slice into %dest[0, 0, 0] [1, 8, 8] [1, 2, 2] : tensor<8x8xf32> into tensor<1x4x4xf32>
-  return %inserted_slice : tensor<1x4x4xf32>
+  %inserted_slice = tensor.insert_slice %extracted_slice into %dest[0, 0, 0] [1, 8, 8] [1, 2, 2] : tensor<8x8xf32> into tensor<1x15x15xf32>
+  return %inserted_slice : tensor<1x15x15xf32>
 }
 // CHECK-LABEL: func.func @no_fold_strided_insert_slice_of_extract_slice(
 // CHECK-SAME:      %[[ARG0:.*]]: tensor<?x8x2x8xf32>
 // CHECK:         %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG0]]
 // CHECK:         %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[EXTRACTED_SLICE]]
-// CHECK:         return %[[INSERTED_SLICE]] : tensor<1x4x4xf32>
+// CHECK:         return %[[INSERTED_SLICE]] : tensor<1x15x15xf32>
 
 // -----
 
diff --git a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
index 988b5d8..cf8711e 100644
--- a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
+++ b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
@@ -271,38 +271,34 @@
 
 // -----
 
-//       CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 + 2)>
 // CHECK-LABEL: func @insert_slice_of_insert_slice(
 //  CHECK-SAME:     %[[t:[0-9a-z]*]]: tensor<f32>
 //  CHECK-SAME:     %[[r1:[0-9a-z]*]]: tensor<1x14xf32>
 //  CHECK-SAME:     %[[pos:[0-9a-z]*]]: index
-//       CHECK:   %[[add:.*]] = affine.apply #[[$map]]()[%[[pos]]]
-//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][4, %[[add]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
+//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][0, %[[pos]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
 func.func @insert_slice_of_insert_slice(%t: tensor<f32>, %r0: tensor<1x1xf32>, %r1: tensor<1x14xf32>, %pos: index)
     -> tensor<1x14xf32> 
 {
-  %0 = tensor.insert_slice %t into %r0[1, 2] [1, 1] [1, 1] 
+  %0 = tensor.insert_slice %t into %r0[0, 0] [1, 1] [1, 1] 
     : tensor<f32> into tensor<1x1xf32>
-  %1 = tensor.insert_slice %0 into %r1[3, %pos] [1, 1] [1, 1] 
+  %1 = tensor.insert_slice %0 into %r1[0, %pos] [1, 1] [1, 1] 
     : tensor<1x1xf32> into tensor<1x14xf32>
   return %1 : tensor<1x14xf32>
 }
 
 // -----
 
-//   CHECK-DAG: #[[$map:.*]] = affine_map<()[s0] -> (s0 + 2)>
 // CHECK-LABEL: func @insert_slice_of_insert_slice(
 //  CHECK-SAME:     %[[t:[0-9a-z]*]]: tensor<f32>
 //  CHECK-SAME:     %[[r1:[0-9a-z]*]]: tensor<1x14xf32>
 //  CHECK-SAME:     %[[pos:[0-9a-z]*]]: index
-//       CHECK:   %[[composed_pos:.+]] = affine.apply #[[$map]]()[%[[pos]]]
-//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][3, %[[composed_pos]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
+//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][0, %[[pos]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
 func.func @insert_slice_of_insert_slice(%t: tensor<f32>, %r0: tensor<1xf32>, %r1: tensor<1x14xf32>, %pos: index)
     -> tensor<1x14xf32> 
 {
-  %0 = tensor.insert_slice %t into %r0[2] [1] [1] 
+  %0 = tensor.insert_slice %t into %r0[0] [1] [1] 
     : tensor<f32> into tensor<1xf32>
-  %1 = tensor.insert_slice %0 into %r1[3, %pos] [1, 1] [1, 1] 
+  %1 = tensor.insert_slice %0 into %r1[0, %pos] [1, 1] [1, 1] 
     : tensor<1xf32> into tensor<1x14xf32>
   return %1 : tensor<1x14xf32>
 }
diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir
index 6541698..eeb3967 100644
--- a/mlir/test/Dialect/Tensor/invalid.mlir
+++ b/mlir/test/Dialect/Tensor/invalid.mlir
@@ -258,6 +258,22 @@
 
 // -----
 
+func.func @extract_slice_offset_out_of_bounds(%arg0: tensor<10xf32>) {
+  // expected-error@+1 {{offset 0 is out-of-bounds: 10 >= 10}}
+  %0 = tensor.extract_slice %arg0 [10][1][1] : tensor<10xf32> to tensor<1xf32>
+  return
+}
+
+// -----
+
+func.func @extract_slice_runs_out_of_bounds(%arg0: tensor<9xf32>) {
+  // expected-error@+1 {{slice along dimension 0 runs out-of-bounds: 9 >= 9}}
+  %0 = tensor.extract_slice %arg0 [3][4][2] : tensor<9xf32> to tensor<4xf32>
+  return
+}
+
+// -----
+
 func.func @insert_slice_wrong_result_rank(%t1: tensor<?xf32>, %t2: tensor<?x?xf32>, %idx : index) {
   // expected-error @+1 {{expected rank to be smaller or equal to the other rank.}}
   %0 = tensor.insert_slice %t2 into %t1[0][4][1] : tensor<?x?xf32> into tensor<?xf32>
@@ -296,6 +312,22 @@
 
 // -----
 
+func.func @insert_slice_offset_out_of_bounds(%arg0: tensor<1xf32>, %arg1: tensor<10xf32>) {
+  // expected-error@+1 {{offset 0 is out-of-bounds: 10 >= 10}}
+  %0 = tensor.insert_slice %arg0 into %arg1 [10][1][1] : tensor<1xf32> into tensor<10xf32>
+  return
+}
+
+// -----
+
+func.func @insert_slice_runs_out_of_bounds(%arg0: tensor<4xf32>, %arg1: tensor<9xf32>) {
+  // expected-error@+1 {{slice along dimension 0 runs out-of-bounds: 9 >= 9}}
+  %0 = tensor.insert_slice %arg0 into %arg1 [3][4][2] : tensor<4xf32> into tensor<9xf32>
+  return
+}
+
+// -----
+
 func.func @illegal_expanding_reshape_static_tensor
     (%arg0: tensor<2x3x20xf32>) -> tensor<2x3x2x4x5xf32> {
   // expected-error @+1 {{expected dimension 2 of collapsed type to be static value of 40}}