| // RUN: mlir-opt %s -test-linalg-transform-patterns=test-hoist-padding-2-level -canonicalize | FileCheck %s |
| |
| #map0 = affine_map<(d0)[s0] -> (2, -d0 + s0)> |
| #map1 = affine_map<(d0)[s0] -> (4, -d0 + s0)> |
| #map2 = affine_map<(d0)[s0] -> (3, -d0 + s0)> |
| #map3 = affine_map<(d0, d1) -> (2, d0 - d1)> |
| #map4 = affine_map<(d0, d1) -> (3, d0 - d1)> |
| |
| // CHECK-LABEL: func @matmul_tensors |
| func @matmul_tensors( |
| %arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) |
| -> tensor<?x?xf32> |
| { |
| %c2 = constant 2 : index |
| %c3 = constant 3 : index |
| %c4 = constant 4 : index |
| %cst = constant 0.000000e+00 : f32 |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %0 = dim %arg0, %c0 : tensor<?x?xf32> |
| %1 = dim %arg0, %c1 : tensor<?x?xf32> |
| %2 = dim %arg1, %c1 : tensor<?x?xf32> |
| |
| // CHECK: scf.for |
| // CHECK: linalg.init_tensor [%{{.*}}, 2, 4] : tensor<?x2x4xf32> |
| // 1-D loop |
| // CHECK: %[[A:.*]] = scf.for |
| // CHECK-NOT: scf.for |
| // CHECK: subtensor %{{.*}} [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> |
| // CHECK: linalg.simple_pad %{{.*}} : tensor<?x?xf32> to tensor<2x4xf32> pad f32 |
| // CHECK: subtensor_insert %{{.*}} into %{{.*}}[%{{.*}}, 0, 0] |
| // CHECK-SAME: [1, 2, 4] [1, 1, 1] : tensor<2x4xf32> into tensor<?x2x4xf32> |
| // 2-D loop |
| // CHECK: linalg.init_tensor [%{{.*}}, %{{.*}}, 4, 3] : tensor<?x?x4x3xf32> |
| // CHECK: %[[B:.*]] = scf.for |
| // CHECK: scf.for |
| // CHECK-NOT: scf.for |
| // CHECK: subtensor %{{.*}} [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> |
| // CHECK: linalg.simple_pad %{{.*}} : tensor<?x?xf32> to tensor<4x3xf32> pad f32 |
| // CHECK: subtensor_insert %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0, 0] |
| // CHECK-SAME: [1, 1, 4, 3] [1, 1, 1, 1] : tensor<4x3xf32> into tensor<?x?x4x3xf32> |
| // 2-D loop |
| // CHECK: scf.for %[[J:[0-9a-zA-Z]+]] |
| // CHECK: scf.for %[[K:[0-9a-zA-Z]+]] |
| // CHECK-NOT: scf.for |
| // CHECK: %[[stA:.*]] = subtensor %[[A]][%[[K]], 0, 0] [1, 2, 4] [1, 1, 1] : |
| // CHECK-SAME: tensor<?x2x4xf32> to tensor<2x4xf32> |
| // CHECK: %[[stB:.*]] = subtensor %[[B]][%[[K]], %[[J]], 0, 0] [1, 1, 4, 3] [1, 1, 1, 1] : |
| // CHECK-SAME: tensor<?x?x4x3xf32> to tensor<4x3xf32> |
| // CHECK: %[[stC:.*]] = linalg.simple_pad %{{.*}} pad %{{.*}} : |
| // CHECK-SAME: tensor<?x?xf32> to tensor<2x3xf32> pad f32 |
| // CHECK: linalg.matmul ins(%[[stA]], %[[stB]] : tensor<2x4xf32>, tensor<4x3xf32>) |
| // CHECK-SAME: outs(%[[stC]] : tensor<2x3xf32>) -> tensor<2x3xf32> |
| %3 = scf.for %arg3 = %c0 to %0 step %c2 iter_args(%arg4 = %arg2) -> (tensor<?x?xf32>) { |
| %4 = scf.for %arg5 = %c0 to %2 step %c3 iter_args(%arg6 = %arg4) -> (tensor<?x?xf32>) { |
| %5 = scf.for %arg7 = %c0 to %1 step %c4 iter_args(%arg8 = %arg6) -> (tensor<?x?xf32>) { |
| %6 = dim %arg0, %c0 : tensor<?x?xf32> |
| %7 = affine.min #map0(%arg3)[%6] |
| %8 = dim %arg0, %c1 : tensor<?x?xf32> |
| %9 = affine.min #map1(%arg7)[%8] |
| %10 = subtensor %arg0[%arg3, %arg7] [%7, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> |
| %11 = dim %arg1, %c0 : tensor<?x?xf32> |
| %12 = affine.min #map1(%arg7)[%11] |
| %13 = dim %arg1, %c1 : tensor<?x?xf32> |
| %14 = affine.min #map2(%arg5)[%13] |
| %15 = subtensor %arg1[%arg7, %arg5] [%12, %14] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> |
| %16 = dim %arg8, %c0 : tensor<?x?xf32> |
| %17 = affine.min #map3(%16, %arg3) |
| %18 = dim %arg8, %c1 : tensor<?x?xf32> |
| %19 = affine.min #map4(%18, %arg5) |
| %20 = subtensor %arg8[%arg3, %arg5] [%17, %19] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> |
| %21 = linalg.simple_pad %10 pad %cst : tensor<?x?xf32> to tensor<2x4xf32> pad f32 |
| %22 = linalg.simple_pad %15 pad %cst : tensor<?x?xf32> to tensor<4x3xf32> pad f32 |
| %23 = linalg.simple_pad %20 pad %cst : tensor<?x?xf32> to tensor<2x3xf32> pad f32 |
| %24 = linalg.matmul ins(%21, %22 : tensor<2x4xf32>, tensor<4x3xf32>) outs(%23 : tensor<2x3xf32>) -> tensor<2x3xf32> |
| %25 = subtensor %24[0, 0] [%7, %14] [1, 1] : tensor<2x3xf32> to tensor<?x?xf32> |
| %26 = subtensor_insert %25 into %arg8[%arg3, %arg5] [%17, %19] [%c1, %c1] : tensor<?x?xf32> into tensor<?x?xf32> |
| scf.yield %26 : tensor<?x?xf32> |
| } |
| scf.yield %5 : tensor<?x?xf32> |
| } |
| scf.yield %4 : tensor<?x?xf32> |
| } |
| return %3 : tensor<?x?xf32> |
| } |