mlir/test/Analysis/test-match-reduction.mlir - llvm-project - Git at Google

 // RUN: mlir-opt %s -test-match-reduction -verify-diagnostics -split-input-file

 // Verify that the generic reduction detection utility works on different
 // dialects.

 // expected-remark@below {{Testing function}}
 func @linalg_red_add(%in0t : tensor<?xf32>, %out0t : tensor<1xf32>) {
   // expected-remark@below {{Reduction found in output #0!}}
   // expected-remark@below {{Reduced Value: <block argument> of type 'f32' at index: 0}}
   // expected-remark@below {{Combiner Op: %1 = arith.addf %arg2, %arg3 : f32}}
   %red = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
                                           affine_map<(d0) -> (0)>],
                                           iterator_types = ["reduction"]}
    ins(%in0t : tensor<?xf32>)
    outs(%out0t : tensor<1xf32>) {
     ^bb0(%in0: f32, %out0: f32):
       %add = arith.addf %in0, %out0 : f32
       linalg.yield %add : f32
     } -> tensor<1xf32>
   return
 }

 // -----

 // expected-remark@below {{Testing function}}
 func @affine_red_add(%in: memref<256x512xf32>, %out: memref<256xf32>) {
  %cst = arith.constant 0.000000e+00 : f32
  affine.for %i = 0 to 256 {
    // expected-remark@below {{Reduction found in output #0!}}
    // expected-remark@below {{Reduced Value: %1 = affine.load %arg0[%arg2, %arg3] : memref<256x512xf32>}}
    // expected-remark@below {{Combiner Op: %2 = arith.addf %arg4, %1 : f32}}
    %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
      %ld = affine.load %in[%i, %j] : memref<256x512xf32>
      %add = arith.addf %red_iter, %ld : f32
      affine.yield %add : f32
    }
    affine.store %final_red, %out[%i] : memref<256xf32>
  }
  return
 }

 // -----

 // TODO: Iteration-carried values with multiple uses are not supported yet.
 // expected-remark@below {{Testing function}}
 func @linalg_red_max(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) {
   // expected-remark@below {{Reduction NOT found in output #0!}}
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%in0t : tensor<4x4xf32>)
    outs(%out0t : tensor<4xf32>) {
     ^bb0(%in0: f32, %out0: f32):
       %cmp = arith.cmpf ogt, %in0, %out0 : f32
       %sel = select %cmp, %in0, %out0 : f32
       linalg.yield %sel : f32
     } -> tensor<4xf32>
   return
 }

 // -----

 // expected-remark@below {{Testing function}}
 func @linalg_fused_red_add(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) {
   // expected-remark@below {{Reduction found in output #0!}}
   // expected-remark@below {{Reduced Value: %2 = arith.subf %1, %arg2 : f32}}
   // expected-remark@below {{Combiner Op: %3 = arith.addf %2, %arg3 : f32}}
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%in0t : tensor<4x4xf32>)
    outs(%out0t : tensor<4xf32>) {
     ^bb0(%in0: f32, %out0: f32):
       %mul = arith.mulf %in0, %in0 : f32
       %sub = arith.subf %mul, %in0 : f32
       %add = arith.addf %sub, %out0 : f32
       linalg.yield %add : f32
     } -> tensor<4xf32>
   return
 }

 // -----

 // expected-remark@below {{Testing function}}
 func @affine_no_red_rec(%in: memref<512xf32>) {
  %cst = arith.constant 0.000000e+00 : f32
  // %rec is the value loaded in the previous iteration.
  // expected-remark@below {{Reduction NOT found in output #0!}}
  %final_val = affine.for %j = 0 to 512 iter_args(%rec = %cst) -> (f32) {
    %ld = affine.load %in[%j] : memref<512xf32>
    %add = arith.addf %ld, %rec : f32
    affine.yield %ld : f32
  }
  return
 }

 // -----

 // expected-remark@below {{Testing function}}
 func @affine_output_dep(%in: memref<512xf32>) {
  %cst = arith.constant 0.000000e+00 : f32
  // Reduction %red is not supported because it depends on another
  // loop-carried dependence.
  // expected-remark@below {{Reduction NOT found in output #0!}}
  // expected-remark@below {{Reduction NOT found in output #1!}}
  %final_red, %final_dep = affine.for %j = 0 to 512
   iter_args(%red = %cst, %dep = %cst) -> (f32, f32) {
    %ld = affine.load %in[%j] : memref<512xf32>
    %add = arith.addf %dep, %red : f32
    affine.yield %add, %ld : f32, f32
  }
  return
 }
	// RUN: mlir-opt %s -test-match-reduction -verify-diagnostics -split-input-file

	// Verify that the generic reduction detection utility works on different
	// dialects.

	// expected-remark@below {{Testing function}}
	func @linalg_red_add(%in0t : tensor<?xf32>, %out0t : tensor<1xf32>) {
	// expected-remark@below {{Reduction found in output #0!}}
	// expected-remark@below {{Reduced Value: <block argument> of type 'f32' at index: 0}}
	// expected-remark@below {{Combiner Op: %1 = arith.addf %arg2, %arg3 : f32}}
	%red = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
	affine_map<(d0) -> (0)>],
	iterator_types = ["reduction"]}
	ins(%in0t : tensor<?xf32>)
	outs(%out0t : tensor<1xf32>) {
	^bb0(%in0: f32, %out0: f32):
	%add = arith.addf %in0, %out0 : f32
	linalg.yield %add : f32
	} -> tensor<1xf32>
	return
	}

	// -----

	// expected-remark@below {{Testing function}}
	func @affine_red_add(%in: memref<256x512xf32>, %out: memref<256xf32>) {
	%cst = arith.constant 0.000000e+00 : f32
	affine.for %i = 0 to 256 {
	// expected-remark@below {{Reduction found in output #0!}}
	// expected-remark@below {{Reduced Value: %1 = affine.load %arg0[%arg2, %arg3] : memref<256x512xf32>}}
	// expected-remark@below {{Combiner Op: %2 = arith.addf %arg4, %1 : f32}}
	%final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
	%ld = affine.load %in[%i, %j] : memref<256x512xf32>
	%add = arith.addf %red_iter, %ld : f32
	affine.yield %add : f32
	}
	affine.store %final_red, %out[%i] : memref<256xf32>
	}
	return
	}

	// -----

	// TODO: Iteration-carried values with multiple uses are not supported yet.
	// expected-remark@below {{Testing function}}
	func @linalg_red_max(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) {
	// expected-remark@below {{Reduction NOT found in output #0!}}
	%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
	affine_map<(d0, d1) -> (d0)>],
	iterator_types = ["parallel", "reduction"]}
	ins(%in0t : tensor<4x4xf32>)
	outs(%out0t : tensor<4xf32>) {
	^bb0(%in0: f32, %out0: f32):
	%cmp = arith.cmpf ogt, %in0, %out0 : f32
	%sel = select %cmp, %in0, %out0 : f32
	linalg.yield %sel : f32
	} -> tensor<4xf32>
	return
	}

	// -----

	// expected-remark@below {{Testing function}}
	func @linalg_fused_red_add(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) {
	// expected-remark@below {{Reduction found in output #0!}}
	// expected-remark@below {{Reduced Value: %2 = arith.subf %1, %arg2 : f32}}
	// expected-remark@below {{Combiner Op: %3 = arith.addf %2, %arg3 : f32}}
	%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
	affine_map<(d0, d1) -> (d0)>],
	iterator_types = ["parallel", "reduction"]}
	ins(%in0t : tensor<4x4xf32>)
	outs(%out0t : tensor<4xf32>) {
	^bb0(%in0: f32, %out0: f32):
	%mul = arith.mulf %in0, %in0 : f32
	%sub = arith.subf %mul, %in0 : f32
	%add = arith.addf %sub, %out0 : f32
	linalg.yield %add : f32
	} -> tensor<4xf32>
	return
	}

	// -----

	// expected-remark@below {{Testing function}}
	func @affine_no_red_rec(%in: memref<512xf32>) {
	%cst = arith.constant 0.000000e+00 : f32
	// %rec is the value loaded in the previous iteration.
	// expected-remark@below {{Reduction NOT found in output #0!}}
	%final_val = affine.for %j = 0 to 512 iter_args(%rec = %cst) -> (f32) {
	%ld = affine.load %in[%j] : memref<512xf32>
	%add = arith.addf %ld, %rec : f32
	affine.yield %ld : f32
	}
	return
	}

	// -----

	// expected-remark@below {{Testing function}}
	func @affine_output_dep(%in: memref<512xf32>) {
	%cst = arith.constant 0.000000e+00 : f32
	// Reduction %red is not supported because it depends on another
	// loop-carried dependence.
	// expected-remark@below {{Reduction NOT found in output #0!}}
	// expected-remark@below {{Reduction NOT found in output #1!}}
	%final_red, %final_dep = affine.for %j = 0 to 512
	iter_args(%red = %cst, %dep = %cst) -> (f32, f32) {
	%ld = affine.load %in[%j] : memref<512xf32>
	%add = arith.addf %dep, %red : f32
	affine.yield %add, %ld : f32, f32
	}
	return
	}