| // RUN: mlir-opt %s -test-match-reduction -verify-diagnostics -split-input-file |
| |
| // Verify that the generic reduction detection utility works on different |
| // dialects. |
| |
| // expected-remark@below {{Testing function}} |
| func @linalg_red_add(%in0t : tensor<?xf32>, %out0t : tensor<1xf32>) { |
| // expected-remark@below {{Reduction found in output #0!}} |
| // expected-remark@below {{Reduced Value: <block argument> of type 'f32' at index: 0}} |
| // expected-remark@below {{Combiner Op: %1 = arith.addf %arg2, %arg3 : f32}} |
| %red = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, |
| affine_map<(d0) -> (0)>], |
| iterator_types = ["reduction"]} |
| ins(%in0t : tensor<?xf32>) |
| outs(%out0t : tensor<1xf32>) { |
| ^bb0(%in0: f32, %out0: f32): |
| %add = arith.addf %in0, %out0 : f32 |
| linalg.yield %add : f32 |
| } -> tensor<1xf32> |
| return |
| } |
| |
| // ----- |
| |
| // expected-remark@below {{Testing function}} |
| func @affine_red_add(%in: memref<256x512xf32>, %out: memref<256xf32>) { |
| %cst = arith.constant 0.000000e+00 : f32 |
| affine.for %i = 0 to 256 { |
| // expected-remark@below {{Reduction found in output #0!}} |
| // expected-remark@below {{Reduced Value: %1 = affine.load %arg0[%arg2, %arg3] : memref<256x512xf32>}} |
| // expected-remark@below {{Combiner Op: %2 = arith.addf %arg4, %1 : f32}} |
| %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) { |
| %ld = affine.load %in[%i, %j] : memref<256x512xf32> |
| %add = arith.addf %red_iter, %ld : f32 |
| affine.yield %add : f32 |
| } |
| affine.store %final_red, %out[%i] : memref<256xf32> |
| } |
| return |
| } |
| |
| // ----- |
| |
| // TODO: Iteration-carried values with multiple uses are not supported yet. |
| // expected-remark@below {{Testing function}} |
| func @linalg_red_max(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) { |
| // expected-remark@below {{Reduction NOT found in output #0!}} |
| %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, |
| affine_map<(d0, d1) -> (d0)>], |
| iterator_types = ["parallel", "reduction"]} |
| ins(%in0t : tensor<4x4xf32>) |
| outs(%out0t : tensor<4xf32>) { |
| ^bb0(%in0: f32, %out0: f32): |
| %cmp = arith.cmpf ogt, %in0, %out0 : f32 |
| %sel = select %cmp, %in0, %out0 : f32 |
| linalg.yield %sel : f32 |
| } -> tensor<4xf32> |
| return |
| } |
| |
| // ----- |
| |
| // expected-remark@below {{Testing function}} |
| func @linalg_fused_red_add(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) { |
| // expected-remark@below {{Reduction found in output #0!}} |
| // expected-remark@below {{Reduced Value: %2 = arith.subf %1, %arg2 : f32}} |
| // expected-remark@below {{Combiner Op: %3 = arith.addf %2, %arg3 : f32}} |
| %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, |
| affine_map<(d0, d1) -> (d0)>], |
| iterator_types = ["parallel", "reduction"]} |
| ins(%in0t : tensor<4x4xf32>) |
| outs(%out0t : tensor<4xf32>) { |
| ^bb0(%in0: f32, %out0: f32): |
| %mul = arith.mulf %in0, %in0 : f32 |
| %sub = arith.subf %mul, %in0 : f32 |
| %add = arith.addf %sub, %out0 : f32 |
| linalg.yield %add : f32 |
| } -> tensor<4xf32> |
| return |
| } |
| |
| // ----- |
| |
| // expected-remark@below {{Testing function}} |
| func @affine_no_red_rec(%in: memref<512xf32>) { |
| %cst = arith.constant 0.000000e+00 : f32 |
| // %rec is the value loaded in the previous iteration. |
| // expected-remark@below {{Reduction NOT found in output #0!}} |
| %final_val = affine.for %j = 0 to 512 iter_args(%rec = %cst) -> (f32) { |
| %ld = affine.load %in[%j] : memref<512xf32> |
| %add = arith.addf %ld, %rec : f32 |
| affine.yield %ld : f32 |
| } |
| return |
| } |
| |
| // ----- |
| |
| // expected-remark@below {{Testing function}} |
| func @affine_output_dep(%in: memref<512xf32>) { |
| %cst = arith.constant 0.000000e+00 : f32 |
| // Reduction %red is not supported because it depends on another |
| // loop-carried dependence. |
| // expected-remark@below {{Reduction NOT found in output #0!}} |
| // expected-remark@below {{Reduction NOT found in output #1!}} |
| %final_red, %final_dep = affine.for %j = 0 to 512 |
| iter_args(%red = %cst, %dep = %cst) -> (f32, f32) { |
| %ld = affine.load %in[%j] : memref<512xf32> |
| %add = arith.addf %dep, %red : f32 |
| affine.yield %add, %ld : f32, f32 |
| } |
| return |
| } |
| |