blob: 02ee2a8b1740c2a1b11fb9595a7f97be9abc199b [file] [log] [blame]
// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
// RUN: mlir-opt %s -test-sparsification | FileCheck %s
#trait_ddd = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> (i,j,k)>, // B
affine_map<(i,j,k) -> (i,j,k)> // X (out)
],
sparse = [
[ "D", "D", "D" ], // A
[ "D", "D", "D" ], // B
[ "D", "D", "D" ] // X
],
iterator_types = ["parallel", "parallel", "parallel"],
doc = "X(i,j,k) = A(i,j,k) OP B(i,j,k)"
}
// CHECK-LABEL: func @add_ddd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 32 : index
// CHECK: %[[VAL_3:.*]] = constant 16 : index
// CHECK: %[[VAL_4:.*]] = constant 8 : index
// CHECK: %[[VAL_5:.*]] = constant 0 : index
// CHECK: %[[VAL_6:.*]] = constant 1 : index
// CHECK: %[[VAL_7:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_8:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_9:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_10]], %[[VAL_11]], %[[VAL_12]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_10]], %[[VAL_11]], %[[VAL_12]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_15:.*]] = addf %[[VAL_13]], %[[VAL_14]] : f32
// CHECK: store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_10]], %[[VAL_11]], %[[VAL_12]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_16:.*]] = tensor_load %[[VAL_9]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_16]] : tensor<32x16x8xf32>
// CHECK: }
func @add_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_ddd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
// CHECK-LABEL: func @mul_ddd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 32 : index
// CHECK: %[[VAL_3:.*]] = constant 16 : index
// CHECK: %[[VAL_4:.*]] = constant 8 : index
// CHECK: %[[VAL_5:.*]] = constant 0 : index
// CHECK: %[[VAL_6:.*]] = constant 1 : index
// CHECK: %[[VAL_7:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_8:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_9:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_10]], %[[VAL_11]], %[[VAL_12]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_10]], %[[VAL_11]], %[[VAL_12]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_15:.*]] = mulf %[[VAL_13]], %[[VAL_14]] : f32
// CHECK: store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_10]], %[[VAL_11]], %[[VAL_12]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_16:.*]] = tensor_load %[[VAL_9]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_16]] : tensor<32x16x8xf32>
// CHECK: }
func @mul_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_ddd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
#trait_dds = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> (i,j,k)>, // B
affine_map<(i,j,k) -> (i,j,k)> // X (out)
],
sparse = [
[ "D", "D", "S" ], // A
[ "D", "D", "D" ], // B
[ "D", "D", "D" ] // X
],
iterator_types = ["parallel", "parallel", "parallel"],
doc = "X(i,j,k) = A(i,j,k) OP B(i,j,k)"
}
// CHECK-LABEL: func @add_dds(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 16 : index
// CHECK: %[[VAL_5:.*]] = constant 8 : index
// CHECK: %[[VAL_6:.*]] = constant 0 : index
// CHECK: %[[VAL_7:.*]] = constant true
// CHECK: %[[VAL_8:.*]] = constant 1 : index
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_12:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_13:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: %[[VAL_16:.*]] = muli %[[VAL_14]], %[[VAL_4]] : index
// CHECK: %[[VAL_17:.*]] = addi %[[VAL_16]], %[[VAL_15]] : index
// CHECK: %[[VAL_18:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_17]]] : memref<?xindex>
// CHECK: %[[VAL_19:.*]] = addi %[[VAL_17]], %[[VAL_8]] : index
// CHECK: %[[VAL_20:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
// CHECK: %[[VAL_21:.*]]:2 = scf.while (%[[VAL_22:.*]] = %[[VAL_18]], %[[VAL_23:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_24:.*]] = cmpi ult, %[[VAL_22]], %[[VAL_20]] : index
// CHECK: scf.condition(%[[VAL_24]]) %[[VAL_22]], %[[VAL_23]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_25:.*]]: index, %[[VAL_26:.*]]: index):
// CHECK: %[[VAL_27:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xindex>
// CHECK: %[[VAL_28:.*]] = cmpi eq, %[[VAL_27]], %[[VAL_26]] : index
// CHECK: scf.if %[[VAL_28]] {
// CHECK: %[[VAL_29:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
// CHECK: %[[VAL_30:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_14]], %[[VAL_15]], %[[VAL_26]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_31:.*]] = addf %[[VAL_29]], %[[VAL_30]] : f32
// CHECK: store %[[VAL_31]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_15]], %[[VAL_26]]] : memref<32x16x8xf32>
// CHECK: } else {
// CHECK: scf.if %[[VAL_7]] {
// CHECK: %[[VAL_32:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_14]], %[[VAL_15]], %[[VAL_26]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_32]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_15]], %[[VAL_26]]] : memref<32x16x8xf32>
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_33:.*]] = cmpi eq, %[[VAL_27]], %[[VAL_26]] : index
// CHECK: %[[VAL_34:.*]] = addi %[[VAL_25]], %[[VAL_8]] : index
// CHECK: %[[VAL_35:.*]] = select %[[VAL_33]], %[[VAL_34]], %[[VAL_25]] : index
// CHECK: %[[VAL_36:.*]] = addi %[[VAL_26]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_35]], %[[VAL_36]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_37:.*]] = %[[VAL_38:.*]]#1 to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_39:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_14]], %[[VAL_15]], %[[VAL_37]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_39]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_15]], %[[VAL_37]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_40:.*]] = tensor_load %[[VAL_13]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_40]] : tensor<32x16x8xf32>
// CHECK: }
func @add_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dds
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
// CHECK-LABEL: func @mul_dds(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 16 : index
// CHECK: %[[VAL_5:.*]] = constant 0 : index
// CHECK: %[[VAL_6:.*]] = constant 1 : index
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_10:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_14:.*]] = muli %[[VAL_12]], %[[VAL_4]] : index
// CHECK: %[[VAL_15:.*]] = addi %[[VAL_14]], %[[VAL_13]] : index
// CHECK: %[[VAL_16:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]] = addi %[[VAL_15]], %[[VAL_6]] : index
// CHECK: %[[VAL_18:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_18]] step %[[VAL_6]] {
// CHECK: %[[VAL_20:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
// CHECK: %[[VAL_21:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xf32>
// CHECK: %[[VAL_22:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_20]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_23:.*]] = mulf %[[VAL_21]], %[[VAL_22]] : f32
// CHECK: store %[[VAL_23]], %[[VAL_11]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_20]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_24:.*]] = tensor_load %[[VAL_11]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_24]] : tensor<32x16x8xf32>
// CHECK: }
func @mul_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dds
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
#trait_dsd = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> (i,j,k)>, // B
affine_map<(i,j,k) -> (i,j,k)> // X (out)
],
sparse = [
[ "D", "S", "D" ], // A
[ "D", "D", "D" ], // B
[ "D", "D", "D" ] // X
],
iterator_types = ["parallel", "parallel", "parallel"],
doc = "X(i,j,k) = A(i,j,k) OP B(i,j,k)"
}
// CHECK-LABEL: func @add_dsd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 16 : index
// CHECK: %[[VAL_5:.*]] = constant 8 : index
// CHECK: %[[VAL_6:.*]] = constant true
// CHECK: %[[VAL_7:.*]] = constant 0 : index
// CHECK: %[[VAL_8:.*]] = constant 1 : index
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_12:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_13:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_8]] {
// CHECK: %[[VAL_15:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_14]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]] = addi %[[VAL_14]], %[[VAL_8]] : index
// CHECK: %[[VAL_17:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_16]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_15]], %[[VAL_20:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_21:.*]] = cmpi ult, %[[VAL_19]], %[[VAL_17]] : index
// CHECK: scf.condition(%[[VAL_21]]) %[[VAL_19]], %[[VAL_20]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index):
// CHECK: %[[VAL_24:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xindex>
// CHECK: %[[VAL_25:.*]] = cmpi eq, %[[VAL_24]], %[[VAL_23]] : index
// CHECK: scf.if %[[VAL_25]] {
// CHECK: scf.for %[[VAL_26:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_27:.*]] = muli %[[VAL_22]], %[[VAL_5]] : index
// CHECK: %[[VAL_28:.*]] = addi %[[VAL_27]], %[[VAL_26]] : index
// CHECK: %[[VAL_29:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xf32>
// CHECK: %[[VAL_30:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_14]], %[[VAL_23]], %[[VAL_26]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_31:.*]] = addf %[[VAL_29]], %[[VAL_30]] : f32
// CHECK: store %[[VAL_31]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_23]], %[[VAL_26]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: scf.for %[[VAL_32:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_33:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_14]], %[[VAL_23]], %[[VAL_32]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_33]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_23]], %[[VAL_32]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_34:.*]] = cmpi eq, %[[VAL_24]], %[[VAL_23]] : index
// CHECK: %[[VAL_35:.*]] = addi %[[VAL_22]], %[[VAL_8]] : index
// CHECK: %[[VAL_36:.*]] = select %[[VAL_34]], %[[VAL_35]], %[[VAL_22]] : index
// CHECK: %[[VAL_37:.*]] = addi %[[VAL_23]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_36]], %[[VAL_37]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_38:.*]] = %[[VAL_39:.*]]#1 to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_40:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_41:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_14]], %[[VAL_38]], %[[VAL_40]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_41]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_38]], %[[VAL_40]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_42:.*]] = tensor_load %[[VAL_13]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_42]] : tensor<32x16x8xf32>
// CHECK: }
func @add_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dsd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
// CHECK-LABEL: func @mul_dsd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 8 : index
// CHECK: %[[VAL_5:.*]] = constant 0 : index
// CHECK: %[[VAL_6:.*]] = constant 1 : index
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_10:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = addi %[[VAL_12]], %[[VAL_6]] : index
// CHECK: %[[VAL_15:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_14]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_6]] {
// CHECK: %[[VAL_17:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_18:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_19:.*]] = muli %[[VAL_16]], %[[VAL_4]] : index
// CHECK: %[[VAL_20:.*]] = addi %[[VAL_19]], %[[VAL_18]] : index
// CHECK: %[[VAL_21:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xf32>
// CHECK: %[[VAL_22:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_12]], %[[VAL_17]], %[[VAL_18]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_23:.*]] = mulf %[[VAL_21]], %[[VAL_22]] : f32
// CHECK: store %[[VAL_23]], %[[VAL_11]]{{\[}}%[[VAL_12]], %[[VAL_17]], %[[VAL_18]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_24:.*]] = tensor_load %[[VAL_11]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_24]] : tensor<32x16x8xf32>
// CHECK: }
func @mul_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dsd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
#trait_dss = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> (i,j,k)>, // B
affine_map<(i,j,k) -> (i,j,k)> // X (out)
],
sparse = [
[ "D", "S", "S" ], // A
[ "D", "D", "D" ], // B
[ "D", "D", "D" ] // X
],
iterator_types = ["parallel", "parallel", "parallel"],
doc = "X(i,j,k) = A(i,j,k) OP B(i,j,k)"
}
// CHECK-LABEL: func @add_dss(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 16 : index
// CHECK: %[[VAL_5:.*]] = constant 8 : index
// CHECK: %[[VAL_6:.*]] = constant true
// CHECK: %[[VAL_7:.*]] = constant 0 : index
// CHECK: %[[VAL_8:.*]] = constant 1 : index
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_12:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_14:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_15:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_8]] {
// CHECK: %[[VAL_17:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_16]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]] = addi %[[VAL_16]], %[[VAL_8]] : index
// CHECK: %[[VAL_19:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xindex>
// CHECK: %[[VAL_20:.*]]:2 = scf.while (%[[VAL_21:.*]] = %[[VAL_17]], %[[VAL_22:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_23:.*]] = cmpi ult, %[[VAL_21]], %[[VAL_19]] : index
// CHECK: scf.condition(%[[VAL_23]]) %[[VAL_21]], %[[VAL_22]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_24:.*]]: index, %[[VAL_25:.*]]: index):
// CHECK: %[[VAL_26:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xindex>
// CHECK: %[[VAL_27:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_25]] : index
// CHECK: scf.if %[[VAL_27]] {
// CHECK: %[[VAL_28:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_24]]] : memref<?xindex>
// CHECK: %[[VAL_29:.*]] = addi %[[VAL_24]], %[[VAL_8]] : index
// CHECK: %[[VAL_30:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_29]]] : memref<?xindex>
// CHECK: %[[VAL_31:.*]]:2 = scf.while (%[[VAL_32:.*]] = %[[VAL_28]], %[[VAL_33:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_34:.*]] = cmpi ult, %[[VAL_32]], %[[VAL_30]] : index
// CHECK: scf.condition(%[[VAL_34]]) %[[VAL_32]], %[[VAL_33]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_35:.*]]: index, %[[VAL_36:.*]]: index):
// CHECK: %[[VAL_37:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_35]]] : memref<?xindex>
// CHECK: %[[VAL_38:.*]] = cmpi eq, %[[VAL_37]], %[[VAL_36]] : index
// CHECK: scf.if %[[VAL_38]] {
// CHECK: %[[VAL_39:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_35]]] : memref<?xf32>
// CHECK: %[[VAL_40:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_16]], %[[VAL_25]], %[[VAL_36]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_41:.*]] = addf %[[VAL_39]], %[[VAL_40]] : f32
// CHECK: store %[[VAL_41]], %[[VAL_15]]{{\[}}%[[VAL_16]], %[[VAL_25]], %[[VAL_36]]] : memref<32x16x8xf32>
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: %[[VAL_42:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_16]], %[[VAL_25]], %[[VAL_36]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_42]], %[[VAL_15]]{{\[}}%[[VAL_16]], %[[VAL_25]], %[[VAL_36]]] : memref<32x16x8xf32>
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_43:.*]] = cmpi eq, %[[VAL_37]], %[[VAL_36]] : index
// CHECK: %[[VAL_44:.*]] = addi %[[VAL_35]], %[[VAL_8]] : index
// CHECK: %[[VAL_45:.*]] = select %[[VAL_43]], %[[VAL_44]], %[[VAL_35]] : index
// CHECK: %[[VAL_46:.*]] = addi %[[VAL_36]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_45]], %[[VAL_46]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_47:.*]] = %[[VAL_48:.*]]#1 to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_49:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_16]], %[[VAL_25]], %[[VAL_47]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_49]], %[[VAL_15]]{{\[}}%[[VAL_16]], %[[VAL_25]], %[[VAL_47]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: scf.for %[[VAL_50:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_51:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_16]], %[[VAL_25]], %[[VAL_50]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_51]], %[[VAL_15]]{{\[}}%[[VAL_16]], %[[VAL_25]], %[[VAL_50]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_52:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_25]] : index
// CHECK: %[[VAL_53:.*]] = addi %[[VAL_24]], %[[VAL_8]] : index
// CHECK: %[[VAL_54:.*]] = select %[[VAL_52]], %[[VAL_53]], %[[VAL_24]] : index
// CHECK: %[[VAL_55:.*]] = addi %[[VAL_25]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_54]], %[[VAL_55]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_56:.*]] = %[[VAL_57:.*]]#1 to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_58:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_59:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_16]], %[[VAL_56]], %[[VAL_58]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_59]], %[[VAL_15]]{{\[}}%[[VAL_16]], %[[VAL_56]], %[[VAL_58]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_60:.*]] = tensor_load %[[VAL_15]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_60]] : tensor<32x16x8xf32>
// CHECK: }
func @add_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dss
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
// CHECK-LABEL: func @mul_dss(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 0 : index
// CHECK: %[[VAL_5:.*]] = constant 1 : index
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_12:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
// CHECK: %[[VAL_14:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = addi %[[VAL_13]], %[[VAL_5]] : index
// CHECK: %[[VAL_16:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_14]] to %[[VAL_16]] step %[[VAL_5]] {
// CHECK: %[[VAL_18:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
// CHECK: %[[VAL_19:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref<?xindex>
// CHECK: %[[VAL_20:.*]] = addi %[[VAL_17]], %[[VAL_5]] : index
// CHECK: %[[VAL_21:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_22:.*]] = %[[VAL_19]] to %[[VAL_21]] step %[[VAL_5]] {
// CHECK: %[[VAL_23:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<?xindex>
// CHECK: %[[VAL_24:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xf32>
// CHECK: %[[VAL_25:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_13]], %[[VAL_18]], %[[VAL_23]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_26:.*]] = mulf %[[VAL_24]], %[[VAL_25]] : f32
// CHECK: store %[[VAL_26]], %[[VAL_12]]{{\[}}%[[VAL_13]], %[[VAL_18]], %[[VAL_23]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_27:.*]] = tensor_load %[[VAL_12]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_27]] : tensor<32x16x8xf32>
// CHECK: }
func @mul_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dss
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
#trait_sdd = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> (i,j,k)>, // B
affine_map<(i,j,k) -> (i,j,k)> // X (out)
],
sparse = [
[ "S", "D", "D" ], // A
[ "D", "D", "D" ], // B
[ "D", "D", "D" ] // X
],
iterator_types = ["parallel", "parallel", "parallel"],
doc = "X(i,j,k) = A(i,j,k) OP B(i,j,k)"
}
// CHECK-LABEL: func @add_sdd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 16 : index
// CHECK: %[[VAL_5:.*]] = constant 8 : index
// CHECK: %[[VAL_6:.*]] = constant true
// CHECK: %[[VAL_7:.*]] = constant 0 : index
// CHECK: %[[VAL_8:.*]] = constant 1 : index
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_12:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_13:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]]:2 = scf.while (%[[VAL_17:.*]] = %[[VAL_14]], %[[VAL_18:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_19:.*]] = cmpi ult, %[[VAL_17]], %[[VAL_15]] : index
// CHECK: scf.condition(%[[VAL_19]]) %[[VAL_17]], %[[VAL_18]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_20:.*]]: index, %[[VAL_21:.*]]: index):
// CHECK: %[[VAL_22:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref<?xindex>
// CHECK: %[[VAL_23:.*]] = cmpi eq, %[[VAL_22]], %[[VAL_21]] : index
// CHECK: scf.if %[[VAL_23]] {
// CHECK: scf.for %[[VAL_24:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: %[[VAL_25:.*]] = muli %[[VAL_20]], %[[VAL_4]] : index
// CHECK: %[[VAL_26:.*]] = addi %[[VAL_25]], %[[VAL_24]] : index
// CHECK: scf.for %[[VAL_27:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_28:.*]] = muli %[[VAL_26]], %[[VAL_5]] : index
// CHECK: %[[VAL_29:.*]] = addi %[[VAL_28]], %[[VAL_27]] : index
// CHECK: %[[VAL_30:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_29]]] : memref<?xf32>
// CHECK: %[[VAL_31:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_32:.*]] = addf %[[VAL_30]], %[[VAL_31]] : f32
// CHECK: store %[[VAL_32]], %[[VAL_13]]{{\[}}%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: scf.for %[[VAL_33:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_34:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_35:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_21]], %[[VAL_33]], %[[VAL_34]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_35]], %[[VAL_13]]{{\[}}%[[VAL_21]], %[[VAL_33]], %[[VAL_34]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_36:.*]] = cmpi eq, %[[VAL_22]], %[[VAL_21]] : index
// CHECK: %[[VAL_37:.*]] = addi %[[VAL_20]], %[[VAL_8]] : index
// CHECK: %[[VAL_38:.*]] = select %[[VAL_36]], %[[VAL_37]], %[[VAL_20]] : index
// CHECK: %[[VAL_39:.*]] = addi %[[VAL_21]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_38]], %[[VAL_39]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_40:.*]] = %[[VAL_41:.*]]#1 to %[[VAL_3]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_42:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_43:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_44:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_40]], %[[VAL_42]], %[[VAL_43]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_44]], %[[VAL_13]]{{\[}}%[[VAL_40]], %[[VAL_42]], %[[VAL_43]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_45:.*]] = tensor_load %[[VAL_13]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_45]] : tensor<32x16x8xf32>
// CHECK: }
func @add_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sdd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
// CHECK-LABEL: func @mul_sdd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 16 : index
// CHECK: %[[VAL_4:.*]] = constant 8 : index
// CHECK: %[[VAL_5:.*]] = constant 0 : index
// CHECK: %[[VAL_6:.*]] = constant 1 : index
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_10:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_12:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_6]] {
// CHECK: %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: %[[VAL_17:.*]] = muli %[[VAL_14]], %[[VAL_3]] : index
// CHECK: %[[VAL_18:.*]] = addi %[[VAL_17]], %[[VAL_16]] : index
// CHECK: scf.for %[[VAL_19:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_20:.*]] = muli %[[VAL_18]], %[[VAL_4]] : index
// CHECK: %[[VAL_21:.*]] = addi %[[VAL_20]], %[[VAL_19]] : index
// CHECK: %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xf32>
// CHECK: %[[VAL_23:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_15]], %[[VAL_16]], %[[VAL_19]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_24:.*]] = mulf %[[VAL_22]], %[[VAL_23]] : f32
// CHECK: store %[[VAL_24]], %[[VAL_11]]{{\[}}%[[VAL_15]], %[[VAL_16]], %[[VAL_19]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_25:.*]] = tensor_load %[[VAL_11]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_25]] : tensor<32x16x8xf32>
// CHECK: }
func @mul_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sdd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
#trait_sds = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> (i,j,k)>, // B
affine_map<(i,j,k) -> (i,j,k)> // X (out)
],
sparse = [
[ "S", "D", "S" ], // A
[ "D", "D", "D" ], // B
[ "D", "D", "D" ] // X
],
iterator_types = ["parallel", "parallel", "parallel"],
doc = "X(i,j,k) = A(i,j,k) OP B(i,j,k)"
}
// CHECK-LABEL: func @add_sds(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 16 : index
// CHECK: %[[VAL_5:.*]] = constant 8 : index
// CHECK: %[[VAL_6:.*]] = constant true
// CHECK: %[[VAL_7:.*]] = constant 0 : index
// CHECK: %[[VAL_8:.*]] = constant 1 : index
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_12:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_14:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_15:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_16:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_21:.*]] = cmpi ult, %[[VAL_19]], %[[VAL_17]] : index
// CHECK: scf.condition(%[[VAL_21]]) %[[VAL_19]], %[[VAL_20]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index):
// CHECK: %[[VAL_24:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xindex>
// CHECK: %[[VAL_25:.*]] = cmpi eq, %[[VAL_24]], %[[VAL_23]] : index
// CHECK: scf.if %[[VAL_25]] {
// CHECK: scf.for %[[VAL_26:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: %[[VAL_27:.*]] = muli %[[VAL_22]], %[[VAL_4]] : index
// CHECK: %[[VAL_28:.*]] = addi %[[VAL_27]], %[[VAL_26]] : index
// CHECK: %[[VAL_29:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
// CHECK: %[[VAL_30:.*]] = addi %[[VAL_28]], %[[VAL_8]] : index
// CHECK: %[[VAL_31:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xindex>
// CHECK: %[[VAL_32:.*]]:2 = scf.while (%[[VAL_33:.*]] = %[[VAL_29]], %[[VAL_34:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_35:.*]] = cmpi ult, %[[VAL_33]], %[[VAL_31]] : index
// CHECK: scf.condition(%[[VAL_35]]) %[[VAL_33]], %[[VAL_34]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_36:.*]]: index, %[[VAL_37:.*]]: index):
// CHECK: %[[VAL_38:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_36]]] : memref<?xindex>
// CHECK: %[[VAL_39:.*]] = cmpi eq, %[[VAL_38]], %[[VAL_37]] : index
// CHECK: scf.if %[[VAL_39]] {
// CHECK: %[[VAL_40:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_36]]] : memref<?xf32>
// CHECK: %[[VAL_41:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_23]], %[[VAL_26]], %[[VAL_37]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_42:.*]] = addf %[[VAL_40]], %[[VAL_41]] : f32
// CHECK: store %[[VAL_42]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_26]], %[[VAL_37]]] : memref<32x16x8xf32>
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: %[[VAL_43:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_23]], %[[VAL_26]], %[[VAL_37]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_43]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_26]], %[[VAL_37]]] : memref<32x16x8xf32>
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_44:.*]] = cmpi eq, %[[VAL_38]], %[[VAL_37]] : index
// CHECK: %[[VAL_45:.*]] = addi %[[VAL_36]], %[[VAL_8]] : index
// CHECK: %[[VAL_46:.*]] = select %[[VAL_44]], %[[VAL_45]], %[[VAL_36]] : index
// CHECK: %[[VAL_47:.*]] = addi %[[VAL_37]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_46]], %[[VAL_47]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_48:.*]] = %[[VAL_49:.*]]#1 to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_50:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_23]], %[[VAL_26]], %[[VAL_48]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_50]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_26]], %[[VAL_48]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: scf.for %[[VAL_51:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_52:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_53:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_23]], %[[VAL_51]], %[[VAL_52]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_53]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_51]], %[[VAL_52]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_54:.*]] = cmpi eq, %[[VAL_24]], %[[VAL_23]] : index
// CHECK: %[[VAL_55:.*]] = addi %[[VAL_22]], %[[VAL_8]] : index
// CHECK: %[[VAL_56:.*]] = select %[[VAL_54]], %[[VAL_55]], %[[VAL_22]] : index
// CHECK: %[[VAL_57:.*]] = addi %[[VAL_23]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_56]], %[[VAL_57]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_58:.*]] = %[[VAL_59:.*]]#1 to %[[VAL_3]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_60:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_61:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_62:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_58]], %[[VAL_60]], %[[VAL_61]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_62]], %[[VAL_15]]{{\[}}%[[VAL_58]], %[[VAL_60]], %[[VAL_61]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_63:.*]] = tensor_load %[[VAL_15]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_63]] : tensor<32x16x8xf32>
// CHECK: }
func @add_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sds
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
// CHECK-LABEL: func @mul_sds(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 16 : index
// CHECK: %[[VAL_4:.*]] = constant 0 : index
// CHECK: %[[VAL_5:.*]] = constant 1 : index
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_12:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_5]] {
// CHECK: %[[VAL_16:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
// CHECK: %[[VAL_18:.*]] = muli %[[VAL_15]], %[[VAL_3]] : index
// CHECK: %[[VAL_19:.*]] = addi %[[VAL_18]], %[[VAL_17]] : index
// CHECK: %[[VAL_20:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
// CHECK: %[[VAL_21:.*]] = addi %[[VAL_19]], %[[VAL_5]] : index
// CHECK: %[[VAL_22:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_21]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_23:.*]] = %[[VAL_20]] to %[[VAL_22]] step %[[VAL_5]] {
// CHECK: %[[VAL_24:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_23]]] : memref<?xindex>
// CHECK: %[[VAL_25:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xf32>
// CHECK: %[[VAL_26:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_24]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_27:.*]] = mulf %[[VAL_25]], %[[VAL_26]] : f32
// CHECK: store %[[VAL_27]], %[[VAL_12]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_24]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_28:.*]] = tensor_load %[[VAL_12]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_28]] : tensor<32x16x8xf32>
// CHECK: }
func @mul_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sds
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
#trait_ssd = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> (i,j,k)>, // B
affine_map<(i,j,k) -> (i,j,k)> // X (out)
],
sparse = [
[ "S", "S", "D" ], // A
[ "D", "D", "D" ], // B
[ "D", "D", "D" ] // X
],
iterator_types = ["parallel", "parallel", "parallel"],
doc = "X(i,j,k) = A(i,j,k) OP B(i,j,k)"
}
// CHECK-LABEL: func @add_ssd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 16 : index
// CHECK: %[[VAL_5:.*]] = constant 8 : index
// CHECK: %[[VAL_6:.*]] = constant true
// CHECK: %[[VAL_7:.*]] = constant 0 : index
// CHECK: %[[VAL_8:.*]] = constant 1 : index
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_12:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_14:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_15:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_16:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_21:.*]] = cmpi ult, %[[VAL_19]], %[[VAL_17]] : index
// CHECK: scf.condition(%[[VAL_21]]) %[[VAL_19]], %[[VAL_20]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index):
// CHECK: %[[VAL_24:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xindex>
// CHECK: %[[VAL_25:.*]] = cmpi eq, %[[VAL_24]], %[[VAL_23]] : index
// CHECK: scf.if %[[VAL_25]] {
// CHECK: %[[VAL_26:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_22]]] : memref<?xindex>
// CHECK: %[[VAL_27:.*]] = addi %[[VAL_22]], %[[VAL_8]] : index
// CHECK: %[[VAL_28:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_27]]] : memref<?xindex>
// CHECK: %[[VAL_29:.*]]:2 = scf.while (%[[VAL_30:.*]] = %[[VAL_26]], %[[VAL_31:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_32:.*]] = cmpi ult, %[[VAL_30]], %[[VAL_28]] : index
// CHECK: scf.condition(%[[VAL_32]]) %[[VAL_30]], %[[VAL_31]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index):
// CHECK: %[[VAL_35:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_33]]] : memref<?xindex>
// CHECK: %[[VAL_36:.*]] = cmpi eq, %[[VAL_35]], %[[VAL_34]] : index
// CHECK: scf.if %[[VAL_36]] {
// CHECK: scf.for %[[VAL_37:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_38:.*]] = muli %[[VAL_33]], %[[VAL_5]] : index
// CHECK: %[[VAL_39:.*]] = addi %[[VAL_38]], %[[VAL_37]] : index
// CHECK: %[[VAL_40:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_39]]] : memref<?xf32>
// CHECK: %[[VAL_41:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_23]], %[[VAL_34]], %[[VAL_37]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_42:.*]] = addf %[[VAL_40]], %[[VAL_41]] : f32
// CHECK: store %[[VAL_42]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_34]], %[[VAL_37]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: scf.for %[[VAL_43:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_44:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_23]], %[[VAL_34]], %[[VAL_43]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_44]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_34]], %[[VAL_43]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_45:.*]] = cmpi eq, %[[VAL_35]], %[[VAL_34]] : index
// CHECK: %[[VAL_46:.*]] = addi %[[VAL_33]], %[[VAL_8]] : index
// CHECK: %[[VAL_47:.*]] = select %[[VAL_45]], %[[VAL_46]], %[[VAL_33]] : index
// CHECK: %[[VAL_48:.*]] = addi %[[VAL_34]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_47]], %[[VAL_48]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_49:.*]] = %[[VAL_50:.*]]#1 to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_51:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_52:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_23]], %[[VAL_49]], %[[VAL_51]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_52]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_49]], %[[VAL_51]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: scf.for %[[VAL_53:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_54:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_55:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_23]], %[[VAL_53]], %[[VAL_54]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_55]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_53]], %[[VAL_54]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_56:.*]] = cmpi eq, %[[VAL_24]], %[[VAL_23]] : index
// CHECK: %[[VAL_57:.*]] = addi %[[VAL_22]], %[[VAL_8]] : index
// CHECK: %[[VAL_58:.*]] = select %[[VAL_56]], %[[VAL_57]], %[[VAL_22]] : index
// CHECK: %[[VAL_59:.*]] = addi %[[VAL_23]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_58]], %[[VAL_59]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_60:.*]] = %[[VAL_61:.*]]#1 to %[[VAL_3]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_62:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_63:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_64:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_60]], %[[VAL_62]], %[[VAL_63]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_64]], %[[VAL_15]]{{\[}}%[[VAL_60]], %[[VAL_62]], %[[VAL_63]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_65:.*]] = tensor_load %[[VAL_15]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_65]] : tensor<32x16x8xf32>
// CHECK: }
func @add_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_ssd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
// CHECK-LABEL: func @mul_ssd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 8 : index
// CHECK: %[[VAL_4:.*]] = constant 0 : index
// CHECK: %[[VAL_5:.*]] = constant 1 : index
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_12:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_5]] {
// CHECK: %[[VAL_16:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_15]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]] = addi %[[VAL_15]], %[[VAL_5]] : index
// CHECK: %[[VAL_19:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_19]] step %[[VAL_5]] {
// CHECK: %[[VAL_21:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_22:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
// CHECK: %[[VAL_23:.*]] = muli %[[VAL_20]], %[[VAL_3]] : index
// CHECK: %[[VAL_24:.*]] = addi %[[VAL_23]], %[[VAL_22]] : index
// CHECK: %[[VAL_25:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
// CHECK: %[[VAL_26:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_16]], %[[VAL_21]], %[[VAL_22]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_27:.*]] = mulf %[[VAL_25]], %[[VAL_26]] : f32
// CHECK: store %[[VAL_27]], %[[VAL_12]]{{\[}}%[[VAL_16]], %[[VAL_21]], %[[VAL_22]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_28:.*]] = tensor_load %[[VAL_12]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_28]] : tensor<32x16x8xf32>
// CHECK: }
func @mul_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_ssd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
#trait_sss = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> (i,j,k)>, // B
affine_map<(i,j,k) -> (i,j,k)> // X (out)
],
sparse = [
[ "S", "S", "S" ], // A
[ "D", "D", "D" ], // B
[ "D", "D", "D" ] // X
],
iterator_types = ["parallel", "parallel", "parallel"],
doc = "X(i,j,k) = A(i,j,k) OP B(i,j,k)"
}
// CHECK-LABEL: func @add_sss(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 16 : index
// CHECK: %[[VAL_5:.*]] = constant 8 : index
// CHECK: %[[VAL_6:.*]] = constant true
// CHECK: %[[VAL_7:.*]] = constant 0 : index
// CHECK: %[[VAL_8:.*]] = constant 1 : index
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_12:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_16:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_17:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_18:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
// CHECK: %[[VAL_19:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
// CHECK: %[[VAL_20:.*]]:2 = scf.while (%[[VAL_21:.*]] = %[[VAL_18]], %[[VAL_22:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_23:.*]] = cmpi ult, %[[VAL_21]], %[[VAL_19]] : index
// CHECK: scf.condition(%[[VAL_23]]) %[[VAL_21]], %[[VAL_22]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_24:.*]]: index, %[[VAL_25:.*]]: index):
// CHECK: %[[VAL_26:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xindex>
// CHECK: %[[VAL_27:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_25]] : index
// CHECK: scf.if %[[VAL_27]] {
// CHECK: %[[VAL_28:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_24]]] : memref<?xindex>
// CHECK: %[[VAL_29:.*]] = addi %[[VAL_24]], %[[VAL_8]] : index
// CHECK: %[[VAL_30:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_29]]] : memref<?xindex>
// CHECK: %[[VAL_31:.*]]:2 = scf.while (%[[VAL_32:.*]] = %[[VAL_28]], %[[VAL_33:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_34:.*]] = cmpi ult, %[[VAL_32]], %[[VAL_30]] : index
// CHECK: scf.condition(%[[VAL_34]]) %[[VAL_32]], %[[VAL_33]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_35:.*]]: index, %[[VAL_36:.*]]: index):
// CHECK: %[[VAL_37:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_35]]] : memref<?xindex>
// CHECK: %[[VAL_38:.*]] = cmpi eq, %[[VAL_37]], %[[VAL_36]] : index
// CHECK: scf.if %[[VAL_38]] {
// CHECK: %[[VAL_39:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_35]]] : memref<?xindex>
// CHECK: %[[VAL_40:.*]] = addi %[[VAL_35]], %[[VAL_8]] : index
// CHECK: %[[VAL_41:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_40]]] : memref<?xindex>
// CHECK: %[[VAL_42:.*]]:2 = scf.while (%[[VAL_43:.*]] = %[[VAL_39]], %[[VAL_44:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_45:.*]] = cmpi ult, %[[VAL_43]], %[[VAL_41]] : index
// CHECK: scf.condition(%[[VAL_45]]) %[[VAL_43]], %[[VAL_44]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_46:.*]]: index, %[[VAL_47:.*]]: index):
// CHECK: %[[VAL_48:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_46]]] : memref<?xindex>
// CHECK: %[[VAL_49:.*]] = cmpi eq, %[[VAL_48]], %[[VAL_47]] : index
// CHECK: scf.if %[[VAL_49]] {
// CHECK: %[[VAL_50:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_46]]] : memref<?xf32>
// CHECK: %[[VAL_51:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_25]], %[[VAL_36]], %[[VAL_47]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_52:.*]] = addf %[[VAL_50]], %[[VAL_51]] : f32
// CHECK: store %[[VAL_52]], %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_36]], %[[VAL_47]]] : memref<32x16x8xf32>
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: %[[VAL_53:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_25]], %[[VAL_36]], %[[VAL_47]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_53]], %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_36]], %[[VAL_47]]] : memref<32x16x8xf32>
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_54:.*]] = cmpi eq, %[[VAL_48]], %[[VAL_47]] : index
// CHECK: %[[VAL_55:.*]] = addi %[[VAL_46]], %[[VAL_8]] : index
// CHECK: %[[VAL_56:.*]] = select %[[VAL_54]], %[[VAL_55]], %[[VAL_46]] : index
// CHECK: %[[VAL_57:.*]] = addi %[[VAL_47]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_56]], %[[VAL_57]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_58:.*]] = %[[VAL_59:.*]]#1 to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_60:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_25]], %[[VAL_36]], %[[VAL_58]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_60]], %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_36]], %[[VAL_58]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: scf.for %[[VAL_61:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_62:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_25]], %[[VAL_36]], %[[VAL_61]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_62]], %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_36]], %[[VAL_61]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_63:.*]] = cmpi eq, %[[VAL_37]], %[[VAL_36]] : index
// CHECK: %[[VAL_64:.*]] = addi %[[VAL_35]], %[[VAL_8]] : index
// CHECK: %[[VAL_65:.*]] = select %[[VAL_63]], %[[VAL_64]], %[[VAL_35]] : index
// CHECK: %[[VAL_66:.*]] = addi %[[VAL_36]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_65]], %[[VAL_66]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_67:.*]] = %[[VAL_68:.*]]#1 to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_69:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_70:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_25]], %[[VAL_67]], %[[VAL_69]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_70]], %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_67]], %[[VAL_69]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: } else {
// CHECK: scf.if %[[VAL_6]] {
// CHECK: scf.for %[[VAL_71:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_72:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_73:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_25]], %[[VAL_71]], %[[VAL_72]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_73]], %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_71]], %[[VAL_72]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_74:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_25]] : index
// CHECK: %[[VAL_75:.*]] = addi %[[VAL_24]], %[[VAL_8]] : index
// CHECK: %[[VAL_76:.*]] = select %[[VAL_74]], %[[VAL_75]], %[[VAL_24]] : index
// CHECK: %[[VAL_77:.*]] = addi %[[VAL_25]], %[[VAL_8]] : index
// CHECK: scf.yield %[[VAL_76]], %[[VAL_77]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_78:.*]] = %[[VAL_79:.*]]#1 to %[[VAL_3]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_80:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
// CHECK: scf.for %[[VAL_81:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
// CHECK: %[[VAL_82:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_78]], %[[VAL_80]], %[[VAL_81]]] : memref<32x16x8xf32>
// CHECK: store %[[VAL_82]], %[[VAL_17]]{{\[}}%[[VAL_78]], %[[VAL_80]], %[[VAL_81]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_83:.*]] = tensor_load %[[VAL_17]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_83]] : tensor<32x16x8xf32>
// CHECK: }
func @add_sss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sss
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
// CHECK-LABEL: func @mul_sss(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_12:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_13:.*]] = alloca() : memref<32x16x8xf32>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_4]] {
// CHECK: %[[VAL_17:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_16]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xindex>
// CHECK: %[[VAL_19:.*]] = addi %[[VAL_16]], %[[VAL_4]] : index
// CHECK: %[[VAL_20:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_19]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_21:.*]] = %[[VAL_18]] to %[[VAL_20]] step %[[VAL_4]] {
// CHECK: %[[VAL_22:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_21]]] : memref<?xindex>
// CHECK: %[[VAL_23:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
// CHECK: %[[VAL_24:.*]] = addi %[[VAL_21]], %[[VAL_4]] : index
// CHECK: %[[VAL_25:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_26:.*]] = %[[VAL_23]] to %[[VAL_25]] step %[[VAL_4]] {
// CHECK: %[[VAL_27:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_26]]] : memref<?xindex>
// CHECK: %[[VAL_28:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_26]]] : memref<?xf32>
// CHECK: %[[VAL_29:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_17]], %[[VAL_22]], %[[VAL_27]]] : memref<32x16x8xf32>
// CHECK: %[[VAL_30:.*]] = mulf %[[VAL_28]], %[[VAL_29]] : f32
// CHECK: store %[[VAL_30]], %[[VAL_13]]{{\[}}%[[VAL_17]], %[[VAL_22]], %[[VAL_27]]] : memref<32x16x8xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_31:.*]] = tensor_load %[[VAL_13]] : memref<32x16x8xf32>
// CHECK: return %[[VAL_31]] : tensor<32x16x8xf32>
// CHECK: }
func @mul_sss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sss
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
return %0 : tensor<32x16x8xf32>
}
#trait_kernel_3d = {
indexing_maps = [
affine_map<(i,j,k,l) -> (i,k,l)>, // B
affine_map<(i,j,k,l) -> (k,j)>, // C
affine_map<(i,j,k,l) -> (l,j)>, // D
affine_map<(i,j,k,l) -> (i,j)> // A (out)
],
sparse = [
[ "D", "D", "S" ], // B
[ "D", "D" ], // C
[ "D", "D" ], // D
[ "D", "D" ] // A
],
iterator_types = ["parallel", "parallel", "reduction", "reduction"],
doc = "A(i,j) += SUM_k,l B(i,k,l) * C(k,j) * D(l,j)"
}
// CHECK-LABEL: func @kernel_3d(
// CHECK-SAME: %[[VAL_0:.*0]]: tensor<?x?xf32>,
// CHECK-SAME: %[[VAL_1:.*1]]: tensor<?x?x?xf32>,
// CHECK-SAME: %[[VAL_2:.*2]]: tensor<?x?xf32>,
// CHECK-SAME: %[[VAL_3:.*3]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
// CHECK: %[[VAL_4:.*]] = constant 999 : index
// CHECK: %[[VAL_5:.*]] = constant 0 : index
// CHECK: %[[VAL_6:.*]] = constant 1 : index
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_4]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_4]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_4]]) : memref<?xf32>
// CHECK: %[[VAL_10:.*]] = dim %[[VAL_2]], %[[VAL_5]] : tensor<?x?xf32>
// CHECK: %[[VAL_11:.*]] = dim %[[VAL_2]], %[[VAL_6]] : tensor<?x?xf32>
// CHECK: %[[VAL_12:.*]] = alloca(%[[VAL_10]], %[[VAL_11]]) : memref<?x?xf32>
// CHECK: %[[VAL_13:.*]] = dim %[[VAL_3]], %[[VAL_5]] : tensor<?x?xf32>
// CHECK: %[[VAL_14:.*]] = dim %[[VAL_3]], %[[VAL_6]] : tensor<?x?xf32>
// CHECK: %[[VAL_15:.*]] = alloca(%[[VAL_13]], %[[VAL_14]]) : memref<?x?xf32>
// CHECK: %[[VAL_16:.*]] = dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32>
// CHECK: %[[VAL_17:.*]] = dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?xf32>
// CHECK: %[[VAL_18:.*]] = alloca(%[[VAL_16]], %[[VAL_17]]) : memref<?x?xf32>
// CHECK: scf.for %[[VAL_19:.*]] = %[[VAL_5]] to %[[VAL_16]] step %[[VAL_6]] {
// CHECK: scf.for %[[VAL_20:.*]] = %[[VAL_5]] to %[[VAL_10]] step %[[VAL_6]] {
// CHECK: %[[VAL_21:.*]] = muli %[[VAL_10]], %[[VAL_19]] : index
// CHECK: %[[VAL_22:.*]] = addi %[[VAL_21]], %[[VAL_20]] : index
// CHECK: %[[VAL_23:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_22]]] : memref<?xindex>
// CHECK: %[[VAL_24:.*]] = addi %[[VAL_22]], %[[VAL_6]] : index
// CHECK: %[[VAL_25:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_24]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_26:.*]] = %[[VAL_23]] to %[[VAL_25]] step %[[VAL_6]] {
// CHECK: %[[VAL_27:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_26]]] : memref<?xindex>
// CHECK: %[[VAL_28:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_26]]] : memref<?xf32>
// CHECK: scf.for %[[VAL_29:.*]] = %[[VAL_5]] to %[[VAL_17]] step %[[VAL_6]] {
// CHECK: %[[VAL_30:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_20]], %[[VAL_29]]] : memref<?x?xf32>
// CHECK: %[[VAL_31:.*]] = mulf %[[VAL_28]], %[[VAL_30]] : f32
// CHECK: %[[VAL_32:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_27]], %[[VAL_29]]] : memref<?x?xf32>
// CHECK: %[[VAL_33:.*]] = mulf %[[VAL_31]], %[[VAL_32]] : f32
// CHECK: %[[VAL_34:.*]] = load %[[VAL_18]]{{\[}}%[[VAL_19]], %[[VAL_29]]] : memref<?x?xf32>
// CHECK: %[[VAL_35:.*]] = addf %[[VAL_33]], %[[VAL_34]] : f32
// CHECK: store %[[VAL_35]], %[[VAL_18]]{{\[}}%[[VAL_19]], %[[VAL_29]]] : memref<?x?xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_36:.*]] = tensor_load %[[VAL_18]] : memref<?x?xf32>
// CHECK: return %[[VAL_36]] : tensor<?x?xf32>
// CHECK: }
func @kernel_3d(%arga: tensor<?x?xf32>,
%argb: tensor<?x?x?xf32>,
%argc: tensor<?x?xf32>,
%argd: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic #trait_kernel_3d
ins(%argb, %argc, %argd : tensor<?x?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arga : tensor<?x?xf32>) {
^bb(%b: f32, %c: f32, %d : f32, %a : f32):
%0 = mulf %b, %c : f32
%1 = mulf %0, %d : f32
%2 = addf %1, %a : f32
linalg.yield %2 : f32
} -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
#trait_sum_reduction = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> ()> // x (scalar out)
],
sparse = [
[ "S", "S", "S" ], // A
[ ] // x
],
iterator_types = ["reduction", "reduction", "reduction"],
doc = "x += SUM_ijk A(i,j,k)"
}
// CHECK-LABEL: func @sum_reduction(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<10x20x30xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_12:.*]] = alloca() : memref<f32>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_4]] {
// CHECK: %[[VAL_16:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
// CHECK: %[[VAL_17:.*]] = addi %[[VAL_15]], %[[VAL_4]] : index
// CHECK: %[[VAL_18:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_18]] step %[[VAL_4]] {
// CHECK: %[[VAL_20:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
// CHECK: %[[VAL_21:.*]] = addi %[[VAL_19]], %[[VAL_4]] : index
// CHECK: %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
// CHECK: %[[VAL_23:.*]] = load %[[VAL_12]][] : memref<f32>
// CHECK: %[[VAL_24:.*]] = scf.for %[[VAL_25:.*]] = %[[VAL_20]] to %[[VAL_22]] step %[[VAL_4]] iter_args(%[[VAL_26:.*]] = %[[VAL_23]]) -> (f32) {
// CHECK: %[[VAL_27:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
// CHECK: %[[VAL_28:.*]] = addf %[[VAL_26]], %[[VAL_27]] : f32
// CHECK: scf.yield %[[VAL_28]] : f32
// CHECK: }
// CHECK: store %[[VAL_29:.*]], %[[VAL_12]][] : memref<f32>
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_30:.*]] = tensor_load %[[VAL_12]] : memref<f32>
// CHECK: return %[[VAL_30]] : tensor<f32>
// CHECK: }
func @sum_reduction(%arga: tensor<10x20x30xf32>, %argx: tensor<f32>) -> tensor<f32> {
%0 = linalg.generic #trait_sum_reduction
ins(%arga : tensor<10x20x30xf32>)
outs(%argx : tensor<f32>) {
^bb(%a : f32, %x : f32):
%0 = addf %x, %a : f32
linalg.yield %0: f32
} -> tensor<f32>
return %0 : tensor<f32>
}
#trait_sum_reduction_inv = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> (i)>, // b
affine_map<(i,j,k) -> ()> // x (scalar out)
],
sparse = [
[ "D", "D", "D" ], // A
[ "D" ], // b
[ ] // x
],
iterator_types = ["reduction", "reduction", "reduction"],
doc = "x += SUM_i A(i,j,k) * b(i)"
}
// CHECK-LABEL: func @sum_reduction_inv(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<?x?x?xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<?xf32>,
// CHECK-SAME: %[[VAL_2:.*]]: tensor<f32>) -> tensor<f32> {
// CHECK: %[[VAL_3:.*]] = constant 2 : index
// CHECK: %[[VAL_4:.*]] = constant 0 : index
// CHECK: %[[VAL_5:.*]] = constant 1 : index
// CHECK: %[[VAL_6:.*]] = dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32>
// CHECK: %[[VAL_7:.*]] = dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?x?xf32>
// CHECK: %[[VAL_8:.*]] = dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_6]], %[[VAL_7]], %[[VAL_8]]) : memref<?x?x?xf32>
// CHECK: %[[VAL_10:.*]] = dim %[[VAL_1]], %[[VAL_4]] : tensor<?xf32>
// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_10]]) : memref<?xf32>
// CHECK: %[[VAL_12:.*]] = alloca() : memref<f32>
// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_10]] step %[[VAL_5]] {
// CHECK: %[[VAL_14:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_13]]] : memref<?xf32>
// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_4]] to %[[VAL_7]] step %[[VAL_5]] {
// CHECK: %[[VAL_16:.*]] = load %[[VAL_12]][] : memref<f32>
// CHECK: %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_4]] to %[[VAL_8]] step %[[VAL_5]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f32) {
// CHECK: %[[VAL_20:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_13]], %[[VAL_15]], %[[VAL_18]]] : memref<?x?x?xf32>
// CHECK: %[[VAL_21:.*]] = mulf %[[VAL_20]], %[[VAL_14]] : f32
// CHECK: %[[VAL_22:.*]] = addf %[[VAL_19]], %[[VAL_21]] : f32
// CHECK: scf.yield %[[VAL_22]] : f32
// CHECK: }
// CHECK: store %[[VAL_23:.*]], %[[VAL_12]][] : memref<f32>
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_24:.*]] = tensor_load %[[VAL_12]] : memref<f32>
// CHECK: return %[[VAL_24]] : tensor<f32>
// CHECK: }
func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
%argb: tensor<?xf32>,
%argx: tensor<f32>) -> tensor<f32> {
%0 = linalg.generic #trait_sum_reduction_inv
ins(%arga, %argb : tensor<?x?x?xf32>, tensor<?xf32>)
outs(%argx : tensor<f32>) {
^bb(%a : f32, %b : f32, %x : f32):
%0 = mulf %a, %b : f32
%1 = addf %x, %0 : f32
linalg.yield %1: f32
} -> tensor<f32>
return %0 : tensor<f32>
}
#trait_invariants = {
indexing_maps = [
affine_map<(i,j,k) -> (i)>, // a
affine_map<(i,j,k) -> (j)>, // b
affine_map<(i,j,k) -> (k)>, // c
affine_map<(i,j,k) -> (i,j,k)> // X (out)
],
sparse = [
[ "D" ], // a
[ "D" ], // b
[ "D" ], // c
[ "D", "D", "D" ] // X
],
iterator_types = ["parallel", "parallel", "parallel"],
doc = "X(i,j,k) = a(i) * b(j) * c(k)"
}
// CHECK-LABEL: func @invariants(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<10xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<20xf32>,
// CHECK-SAME: %[[VAL_2:.*]]: tensor<30xf32>,
// CHECK-SAME: %[[SHAPE:.*]]: tensor<10x20x30xf32>) -> tensor<10x20x30xf32> {
// CHECK: %[[VAL_3:.*]] = constant 10 : index
// CHECK: %[[VAL_4:.*]] = constant 20 : index
// CHECK: %[[VAL_5:.*]] = constant 30 : index
// CHECK: %[[VAL_6:.*]] = constant 0 : index
// CHECK: %[[VAL_7:.*]] = constant 1 : index
// CHECK: %[[VAL_8:.*]] = alloca() : memref<10xf32>
// CHECK: %[[VAL_9:.*]] = alloca() : memref<20xf32>
// CHECK: %[[VAL_10:.*]] = alloca() : memref<30xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<10x20x30xf32>
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
// CHECK: %[[VAL_13:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<10xf32>
// CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
// CHECK: %[[VAL_15:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_14]]] : memref<20xf32>
// CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] {
// CHECK: %[[VAL_17:.*]] = mulf %[[VAL_13]], %[[VAL_15]] : f32
// CHECK: %[[VAL_18:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_16]]] : memref<30xf32>
// CHECK: %[[VAL_19:.*]] = mulf %[[VAL_17]], %[[VAL_18]] : f32
// CHECK: store %[[VAL_19]], %[[VAL_11]]{{\[}}%[[VAL_12]], %[[VAL_14]], %[[VAL_16]]] : memref<10x20x30xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_20:.*]] = tensor_load %[[VAL_11]] : memref<10x20x30xf32>
// CHECK: return %[[VAL_20]] : tensor<10x20x30xf32>
// CHECK: }
func @invariants(%arga: tensor<10xf32>,
%argb: tensor<20xf32>,
%argc: tensor<30xf32>,
%shape : tensor<10x20x30xf32>) -> tensor<10x20x30xf32> {
%0 = linalg.generic #trait_invariants
ins(%arga, %argb, %argc : tensor<10xf32>, tensor<20xf32>, tensor<30xf32>)
outs(%shape : tensor<10x20x30xf32>) {
^bb(%a : f32, %b : f32, %c : f32, %s : f32):
%0 = mulf %a, %b : f32
%1 = mulf %0, %c : f32
linalg.yield %1: f32
} -> tensor<10x20x30xf32>
return %0 : tensor<10x20x30xf32>
}