[mlir][memref] Update tests to use memref.assume_alignment properly. (#142358)

With
https://github.com/llvm/llvm-project/commit/ffb9bbfd0745dc22e1fd6edd7b62f72b91f4f6de,
memref.assume_alignment op returns a result value. The revision updates
the tests to reflect the change:

- Update all the lit tests to use the result of memref.assume_alignment,
if it is present.
- Capture the result of the op in lit tests.

---------

Signed-off-by: hanhanW <hanhan0912@gmail.com>
diff --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
index 8c863bb..acfc188 100644
--- a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
@@ -189,7 +189,7 @@
   // CHECK-NEXT: %[[ALIGN:.*]] = llvm.mlir.constant(16 : index) : i64
   // CHECK-NEXT: llvm.intr.assume %[[TRUE]] ["align"(%[[PTR]], %[[ALIGN]] : !llvm.ptr, i64)] : i1
   // CHECK-INTERFACE: llvm.intr.assume
-  memref.assume_alignment %0, 16 : memref<4x4xf16>
+  %1 = memref.assume_alignment %0, 16 : memref<4x4xf16>
   return
 }
 
@@ -205,7 +205,7 @@
   // CHECK-DAG: %[[ALIGN:.*]] = llvm.mlir.constant(16 : index) : i64
   // CHECK-NEXT: llvm.intr.assume %[[TRUE]] ["align"(%[[BUFF_ADDR]], %[[ALIGN]] : !llvm.ptr, i64)] : i1
   // CHECK-INTERFACE: llvm.intr.assume
-  memref.assume_alignment %0, 16 : memref<4x4xf16, strided<[?, ?], offset: ?>>
+  %1 = memref.assume_alignment %0, 16 : memref<4x4xf16, strided<[?, ?], offset: ?>>
   return
 }
 // -----
diff --git a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
index 111a02a..3378d32 100644
--- a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
+++ b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
@@ -63,7 +63,7 @@
 
 func.func @memref_load_i4_rank2(%arg0: index, %arg1: index) -> i4 {
     %0 = memref.alloc() : memref<3x125xi4>
-    %align0 =memref.assume_alignment %0, 64 : memref<3x125xi4>
+    %align0 = memref.assume_alignment %0, 64 : memref<3x125xi4>
     %1 = memref.load %align0[%arg0,%arg1] : memref<3x125xi4>
     return %1 : i4
 }
diff --git a/mlir/test/Dialect/MemRef/ops.mlir b/mlir/test/Dialect/MemRef/ops.mlir
index 38ee363..13fdf3c 100644
--- a/mlir/test/Dialect/MemRef/ops.mlir
+++ b/mlir/test/Dialect/MemRef/ops.mlir
@@ -283,7 +283,7 @@
 // CHECK-LABEL: func @assume_alignment
 // CHECK-SAME: %[[MEMREF:.*]]: memref<4x4xf16>
 func.func @assume_alignment(%0: memref<4x4xf16>) {
-  // CHECK: memref.assume_alignment %[[MEMREF]], 16 : memref<4x4xf16>
+  // CHECK: %{{.*}} = memref.assume_alignment %[[MEMREF]], 16 : memref<4x4xf16>
   %1 = memref.assume_alignment %0, 16 : memref<4x4xf16>
   return
 }
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir b/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir
index aaa3aff..a5653f3 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/gemm_f32_f16_f16_128x128x128.mlir
@@ -120,7 +120,7 @@
             threads(%arg3, %arg4, %arg5) in (%arg9 = %hc128, %arg10 = %hc1, %arg11 = %hc1) 
             dynamic_shared_memory_size %shmemSize 
   {  
-    memref.assume_alignment %matrixD, 16 : memref<128x128xf32>
+    %align_matrixD = memref.assume_alignment %matrixD, 16 : memref<128x128xf32>
 
     %c256 = arith.constant 256 : index
     %c10000000 = arith.constant 10000000 : index
@@ -226,7 +226,7 @@
     scf.for %arg12 = %17 to %c128 step %c4 {
       %19 = arith.muli %18, %c4 : index
       %20 = vector.load %accShmemPtr[%arg12, %19] : memref<128x128xf32, 3>, vector<4xf32>
-      vector.store %20, %matrixD[%arg12, %19] : memref<128x128xf32>, vector<4xf32>
+      vector.store %20, %align_matrixD[%arg12, %19] : memref<128x128xf32>, vector<4xf32>
     }
     gpu.terminator
   }
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/gemm_pred_f32_f16_f16_128x128x128.mlir b/mlir/test/Integration/GPU/CUDA/sm90/gemm_pred_f32_f16_f16_128x128x128.mlir
index b257d2b..197351f 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/gemm_pred_f32_f16_f16_128x128x128.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/gemm_pred_f32_f16_f16_128x128x128.mlir
@@ -120,7 +120,7 @@
             threads(%arg3, %arg4, %arg5) in (%arg9 = %hc128, %arg10 = %hc1, %arg11 = %hc1) 
             dynamic_shared_memory_size %shmemSize 
   {  
-    memref.assume_alignment %matrixD, 16 : memref<128x128xf32>
+    %align_matrixD = memref.assume_alignment %matrixD, 16 : memref<128x128xf32>
 
     %c256 = arith.constant 256 : index
     %c10000000 = arith.constant 10000000 : index
@@ -234,7 +234,7 @@
     scf.for %arg12 = %17 to %c128 step %c4 {
       %19 = arith.muli %18, %c4 : index
       %20 = vector.load %accShmemPtr[%arg12, %19] : memref<128x128xf32, 3>, vector<4xf32>
-      vector.store %20, %matrixD[%arg12, %19] : memref<128x128xf32>, vector<4xf32>
+      vector.store %20, %align_matrixD[%arg12, %19] : memref<128x128xf32>, vector<4xf32>
     }
     gpu.terminator
   }