mlir/test/Dialect/Linalg/tiled-loop-to-scf.mlir - llvm-project - Git at Google

 // RUN: mlir-opt %s -convert-linalg-tiled-loops-to-scf --split-input-file | FileCheck %s


 #map0 = affine_map<(d0) -> (24, -d0 + 192)>
 #map1 = affine_map<(d0, d1)[s0] -> (d0 * 192 + s0 + d1)>
 #map2 = affine_map<(d0) -> (16, -d0 + 192)>

 func @tiled_loop(%A: memref<192x192xf32>,
                  %B: memref<192x192xf32>,
                  %C: memref<192x192xf32>) {
   %cst = arith.constant 0.000000e+00 : f32
   %c24 = arith.constant 24 : index
   %c16 = arith.constant 16 : index
   %c0 = arith.constant 0 : index
   %c192 = arith.constant 192 : index

   linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c192, %c192) step (%c24, %c16)
       ins (%A_ = %A: memref<192x192xf32>, %B_ = %B:  memref<192x192xf32>)
       outs (%C_ = %C: memref<192x192xf32>) {
     %0 = affine.min #map0(%i)
     %1 = memref.subview %A_[%i, 0] [%0, 192] [1, 1]
       : memref<192x192xf32> to memref<?x192xf32, #map1>
     %2 = affine.min #map2(%j)
     %3 = memref.subview %B_[0, %j] [192, %2] [1, 1]
       : memref<192x192xf32> to memref<192x?xf32, #map1>
     %4 = memref.subview %C_[%i, %j] [%0, %2] [1, 1]
       : memref<192x192xf32> to memref<?x?xf32, #map1>
     linalg.fill(%cst, %4) : f32, memref<?x?xf32, #map1>
     linalg.matmul ins(%1, %3 : memref<?x192xf32, #map1>,
                                memref<192x?xf32, #map1>)
                   outs(%4 : memref<?x?xf32, #map1>)
     linalg.yield
   }
   return
 }

 // CHECK-LABEL: @tiled_loop
 // CHECK-SAME:  %[[A:.*]]: memref<192x192xf32>, %[[B:.*]]: memref<192x192xf32>,
 // CHECK-SAME:  %[[C:.*]]: memref<192x192xf32>) {
 // CHECK:       %[[C24:.*]] = arith.constant 24 : index
 // CHECK:       %[[C16:.*]] = arith.constant 16 : index
 // CHECK:       %[[C0:.*]] = arith.constant 0 : index
 // CHECK:       %[[C192:.*]] = arith.constant 192 : index
 // CHECK:       scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[C0]], %[[C0]])
 // CHECK-SAME:      to (%[[C192]], %[[C192]]) step (%[[C24]], %[[C16]]) {
 // CHECK:         %[[A_sub:.*]] = memref.subview %[[A]][%[[I]]
 // CHECK:         %[[B_sub:.*]] = memref.subview %[[B]][0, %[[J]]]
 // CHECK:         %[[C_sub:.*]] = memref.subview %[[C]][%[[I]]
 // CHECK:         linalg.fill
 // CHECK:         linalg.matmul

 // -----

 func @tiled_loop_reduction(%A: memref<192x192xf32>,
                            %B: memref<192x192xf32>,
                            %C: memref<f32>) {
    %c24 = arith.constant 24 : index
    %c16 = arith.constant 16 : index
    %c0 = arith.constant 0 : index
    %c192 = arith.constant 192 : index
    %cst = arith.constant 0.000000e+00 : f32

   linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c192, %c192) step (%c24, %c16)
       ins (%A_ = %A: memref<192x192xf32>, %B_ = %B:  memref<192x192xf32>)
       outs (%C_ = %C: memref<f32>)
       iterators["reduction", "reduction"] {
     linalg.fill(%cst, %A_) : f32, memref<192x192xf32>
     linalg.yield
   }
   return
 }

 // CHECK-LABEL: @tiled_loop_reduction
 // CHECK:       %[[C24:.*]] = arith.constant 24 : index
 // CHECK:       %[[C16:.*]] = arith.constant 16 : index
 // CHECK:       %[[C0:.*]] = arith.constant 0 : index
 // CHECK:       %[[C192:.*]] = arith.constant 192 : index
 // CHECK:       scf.for %{{.*}} = %[[C0]] to %[[C192]] step %[[C24]]
 // CHECK:         scf.for %{{.*}} = %[[C0]] to %[[C192]] step %[[C16]]
 // CHECK:           linalg.fill

 // -----

 #strided_1d = affine_map<(d0)[s0] -> (d0 + s0)>
 #strided_2d = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)>

 func @tiled_loop_row_reduction(%A: memref<10x8xf32>,
                                %B: memref<8xf32>) {
    %c0 = arith.constant 0 : index
    %c2 = arith.constant 2 : index
    %c4 = arith.constant 4 : index
    %c8 = arith.constant 8 : index
    %c10 = arith.constant 10 : index
    %cst = arith.constant 0.000000e+00 : f32

   linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c10, %c8) step (%c2, %c4)
       ins (%A_ = %A: memref<10x8xf32>)
       outs (%B_ = %B: memref<8xf32>)
       iterators["reduction", "parallel"] {
     %A_sub = memref.subview %A_[%i, %j][2, 4][1, 1]
       : memref<10x8xf32> to memref<2x4xf32, #strided_2d>
     %B_sub = memref.subview %B_[%j][4][1]
       : memref<8xf32> to memref<4xf32, #strided_1d>
     linalg.generic {
         indexing_maps = [affine_map<(i, j) -> (i, j)>,
                          affine_map<(i, j) -> (j)>],
         iterator_types = ["reduction", "parallel"]}
         ins(%A_sub : memref<2x4xf32, #strided_2d>)
         outs(%B_sub : memref<4xf32, #strided_1d>) {
       ^bb(%a: f32, %b: f32) :
         %0 = arith.addf %a, %b: f32
         linalg.yield %0 : f32
       }
     linalg.yield
   }
   return
 }

 // CHECK-LABEL: @tiled_loop_row_reduction

 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
 // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
 // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
 // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index

 // CHECK:     scf.parallel (%[[J:.*]]) = (%[[C0]]) to (%[[C8]]) step (%[[C4]])
 // CHECK-NEXT:  scf.for %[[I:.*]] = %[[C0]] to %[[C10]] step %[[C2]]
 // CHECK-NEXT:    memref.subview %arg{{[0-9]+}}[%[[I]], %[[J]]] [2, 4] [1, 1]
 // CHECK-SAME:      : memref<10x8xf32> to memref<2x4xf32, #map{{[0-9]+}}>
 // CHECK-NEXT:    memref.subview %arg{{[0-9]+}}[%[[J]]] [4] [1]
 // CHECK-SAME:      : memref<8xf32> to memref<4xf32, #map{{[0-9]+}}>

 // -----

 #strided_1d = affine_map<(d0)[s0] -> (d0 + s0)>
 #strided_2d = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)>

 func @tiled_loop_col_reduction(%A: memref<10x8xf32>,
                                %B: memref<10xf32>) {
    %c0 = arith.constant 0 : index
    %c2 = arith.constant 2 : index
    %c4 = arith.constant 4 : index
    %c8 = arith.constant 8 : index
    %c10 = arith.constant 10 : index
    %cst = arith.constant 0.000000e+00 : f32

   linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c10, %c8) step (%c2, %c4)
       ins (%A_ = %A: memref<10x8xf32>)
       outs (%B_ = %B: memref<10xf32>)
       iterators["parallel", "reduction"] {
     %A_sub = memref.subview %A_[%i, %j][2, 4][1, 1]
       : memref<10x8xf32> to memref<2x4xf32, #strided_2d>
     %B_sub = memref.subview %B_[%i][2][1]
       : memref<10xf32> to memref<2xf32, #strided_1d>
     linalg.generic {
         indexing_maps = [affine_map<(i, j) -> (i, j)>,
                          affine_map<(i, j) -> (i)>],
         iterator_types = ["parallel", "reduction"]}
         ins(%A_sub : memref<2x4xf32, #strided_2d>)
         outs(%B_sub : memref<2xf32, #strided_1d>) {
       ^bb(%a: f32, %b: f32) :
         %0 = arith.addf %a, %b: f32
         linalg.yield %0 : f32
       }
     linalg.yield
   }
   return
 }

 // CHECK-LABEL: @tiled_loop_col_reduction

 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
 // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
 // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
 // CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index

 // CHECK:     scf.parallel (%[[I:.*]]) = (%[[C0]]) to (%[[C10]]) step (%[[C2]])
 // CHECK-NEXT:  scf.for %[[J:.*]] = %[[C0]] to %[[C8]] step %[[C4]]
 // CHECK-NEXT:    memref.subview %arg{{[0-9]+}}[%[[I]], %[[J]]] [2, 4] [1, 1]
 // CHECK-SAME:      : memref<10x8xf32> to memref<2x4xf32, #map{{[0-9]+}}>
 // CHECK-NEXT:    memref.subview %arg{{[0-9]+}}[%[[I]]] [2] [1]
 // CHECK-SAME:      : memref<10xf32> to memref<2xf32, #map{{[0-9]+}}>
	// RUN: mlir-opt %s -convert-linalg-tiled-loops-to-scf --split-input-file \| FileCheck %s


	#map0 = affine_map<(d0) -> (24, -d0 + 192)>
	#map1 = affine_map<(d0, d1)[s0] -> (d0 * 192 + s0 + d1)>
	#map2 = affine_map<(d0) -> (16, -d0 + 192)>

	func @tiled_loop(%A: memref<192x192xf32>,
	%B: memref<192x192xf32>,
	%C: memref<192x192xf32>) {
	%cst = arith.constant 0.000000e+00 : f32
	%c24 = arith.constant 24 : index
	%c16 = arith.constant 16 : index
	%c0 = arith.constant 0 : index
	%c192 = arith.constant 192 : index

	linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c192, %c192) step (%c24, %c16)
	ins (%A_ = %A: memref<192x192xf32>, %B_ = %B: memref<192x192xf32>)
	outs (%C_ = %C: memref<192x192xf32>) {
	%0 = affine.min #map0(%i)
	%1 = memref.subview %A_[%i, 0] [%0, 192] [1, 1]
	: memref<192x192xf32> to memref<?x192xf32, #map1>
	%2 = affine.min #map2(%j)
	%3 = memref.subview %B_[0, %j] [192, %2] [1, 1]
	: memref<192x192xf32> to memref<192x?xf32, #map1>
	%4 = memref.subview %C_[%i, %j] [%0, %2] [1, 1]
	: memref<192x192xf32> to memref<?x?xf32, #map1>
	linalg.fill(%cst, %4) : f32, memref<?x?xf32, #map1>
	linalg.matmul ins(%1, %3 : memref<?x192xf32, #map1>,
	memref<192x?xf32, #map1>)
	outs(%4 : memref<?x?xf32, #map1>)
	linalg.yield
	}
	return
	}

	// CHECK-LABEL: @tiled_loop
	// CHECK-SAME: %[[A:.]]: memref<192x192xf32>, %[[B:.]]: memref<192x192xf32>,
	// CHECK-SAME: %[[C:.*]]: memref<192x192xf32>) {
	// CHECK: %[[C24:.*]] = arith.constant 24 : index
	// CHECK: %[[C16:.*]] = arith.constant 16 : index
	// CHECK: %[[C0:.*]] = arith.constant 0 : index
	// CHECK: %[[C192:.*]] = arith.constant 192 : index
	// CHECK: scf.parallel (%[[I:.]], %[[J:.]]) = (%[[C0]], %[[C0]])
	// CHECK-SAME: to (%[[C192]], %[[C192]]) step (%[[C24]], %[[C16]]) {
	// CHECK: %[[A_sub:.*]] = memref.subview %[[A]][%[[I]]
	// CHECK: %[[B_sub:.*]] = memref.subview %[[B]][0, %[[J]]]
	// CHECK: %[[C_sub:.*]] = memref.subview %[[C]][%[[I]]
	// CHECK: linalg.fill
	// CHECK: linalg.matmul

	// -----

	func @tiled_loop_reduction(%A: memref<192x192xf32>,
	%B: memref<192x192xf32>,
	%C: memref<f32>) {
	%c24 = arith.constant 24 : index
	%c16 = arith.constant 16 : index
	%c0 = arith.constant 0 : index
	%c192 = arith.constant 192 : index
	%cst = arith.constant 0.000000e+00 : f32

	linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c192, %c192) step (%c24, %c16)
	ins (%A_ = %A: memref<192x192xf32>, %B_ = %B: memref<192x192xf32>)
	outs (%C_ = %C: memref<f32>)
	iterators["reduction", "reduction"] {
	linalg.fill(%cst, %A_) : f32, memref<192x192xf32>
	linalg.yield
	}
	return
	}

	// CHECK-LABEL: @tiled_loop_reduction
	// CHECK: %[[C24:.*]] = arith.constant 24 : index
	// CHECK: %[[C16:.*]] = arith.constant 16 : index
	// CHECK: %[[C0:.*]] = arith.constant 0 : index
	// CHECK: %[[C192:.*]] = arith.constant 192 : index
	// CHECK: scf.for %{{.*}} = %[[C0]] to %[[C192]] step %[[C24]]
	// CHECK: scf.for %{{.*}} = %[[C0]] to %[[C192]] step %[[C16]]
	// CHECK: linalg.fill

	// -----

	#strided_1d = affine_map<(d0)[s0] -> (d0 + s0)>
	#strided_2d = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)>

	func @tiled_loop_row_reduction(%A: memref<10x8xf32>,
	%B: memref<8xf32>) {
	%c0 = arith.constant 0 : index
	%c2 = arith.constant 2 : index
	%c4 = arith.constant 4 : index
	%c8 = arith.constant 8 : index
	%c10 = arith.constant 10 : index
	%cst = arith.constant 0.000000e+00 : f32

	linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c10, %c8) step (%c2, %c4)
	ins (%A_ = %A: memref<10x8xf32>)
	outs (%B_ = %B: memref<8xf32>)
	iterators["reduction", "parallel"] {
	%A_sub = memref.subview %A_[%i, %j][2, 4][1, 1]
	: memref<10x8xf32> to memref<2x4xf32, #strided_2d>
	%B_sub = memref.subview %B_[%j][4][1]
	: memref<8xf32> to memref<4xf32, #strided_1d>
	linalg.generic {
	indexing_maps = [affine_map<(i, j) -> (i, j)>,
	affine_map<(i, j) -> (j)>],
	iterator_types = ["reduction", "parallel"]}
	ins(%A_sub : memref<2x4xf32, #strided_2d>)
	outs(%B_sub : memref<4xf32, #strided_1d>) {
	^bb(%a: f32, %b: f32) :
	%0 = arith.addf %a, %b: f32
	linalg.yield %0 : f32
	}
	linalg.yield
	}
	return
	}

	// CHECK-LABEL: @tiled_loop_row_reduction

	// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
	// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
	// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
	// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
	// CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index

	// CHECK: scf.parallel (%[[J:.*]]) = (%[[C0]]) to (%[[C8]]) step (%[[C4]])
	// CHECK-NEXT: scf.for %[[I:.*]] = %[[C0]] to %[[C10]] step %[[C2]]
	// CHECK-NEXT: memref.subview %arg{{[0-9]+}}[%[[I]], %[[J]]] [2, 4] [1, 1]
	// CHECK-SAME: : memref<10x8xf32> to memref<2x4xf32, #map{{[0-9]+}}>
	// CHECK-NEXT: memref.subview %arg{{[0-9]+}}[%[[J]]] [4] [1]
	// CHECK-SAME: : memref<8xf32> to memref<4xf32, #map{{[0-9]+}}>

	// -----

	#strided_1d = affine_map<(d0)[s0] -> (d0 + s0)>
	#strided_2d = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)>

	func @tiled_loop_col_reduction(%A: memref<10x8xf32>,
	%B: memref<10xf32>) {
	%c0 = arith.constant 0 : index
	%c2 = arith.constant 2 : index
	%c4 = arith.constant 4 : index
	%c8 = arith.constant 8 : index
	%c10 = arith.constant 10 : index
	%cst = arith.constant 0.000000e+00 : f32

	linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c10, %c8) step (%c2, %c4)
	ins (%A_ = %A: memref<10x8xf32>)
	outs (%B_ = %B: memref<10xf32>)
	iterators["parallel", "reduction"] {
	%A_sub = memref.subview %A_[%i, %j][2, 4][1, 1]
	: memref<10x8xf32> to memref<2x4xf32, #strided_2d>
	%B_sub = memref.subview %B_[%i][2][1]
	: memref<10xf32> to memref<2xf32, #strided_1d>
	linalg.generic {
	indexing_maps = [affine_map<(i, j) -> (i, j)>,
	affine_map<(i, j) -> (i)>],
	iterator_types = ["parallel", "reduction"]}
	ins(%A_sub : memref<2x4xf32, #strided_2d>)
	outs(%B_sub : memref<2xf32, #strided_1d>) {
	^bb(%a: f32, %b: f32) :
	%0 = arith.addf %a, %b: f32
	linalg.yield %0 : f32
	}
	linalg.yield
	}
	return
	}

	// CHECK-LABEL: @tiled_loop_col_reduction

	// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
	// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
	// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
	// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
	// CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index

	// CHECK: scf.parallel (%[[I:.*]]) = (%[[C0]]) to (%[[C10]]) step (%[[C2]])
	// CHECK-NEXT: scf.for %[[J:.*]] = %[[C0]] to %[[C8]] step %[[C4]]
	// CHECK-NEXT: memref.subview %arg{{[0-9]+}}[%[[I]], %[[J]]] [2, 4] [1, 1]
	// CHECK-SAME: : memref<10x8xf32> to memref<2x4xf32, #map{{[0-9]+}}>
	// CHECK-NEXT: memref.subview %arg{{[0-9]+}}[%[[I]]] [2] [1]
	// CHECK-SAME: : memref<10xf32> to memref<2xf32, #map{{[0-9]+}}>