| // RUN: mlir-opt --test-emulate-narrow-int="memref-load-bitwidth=8" --cse --split-input-file %s | FileCheck %s |
| // RUN: mlir-opt --test-emulate-narrow-int="memref-load-bitwidth=32" --cse --split-input-file %s | FileCheck %s --check-prefix=CHECK32 |
| |
| // Expect no conversions. |
| func.func @memref_i8() -> i8 { |
| %c3 = arith.constant 3 : index |
| %m = memref.alloc() : memref<4xi8, 1> |
| %v = memref.load %m[%c3] : memref<4xi8, 1> |
| return %v : i8 |
| } |
| // CHECK-LABEL: func @memref_i8() |
| // CHECK: %[[M:.+]] = memref.alloc() : memref<4xi8, 1> |
| // CHECK-NEXT: %[[V:.+]] = memref.load %[[M]][%{{.+}}] : memref<4xi8, 1> |
| // CHECK-NEXT: return %[[V]] |
| |
| // CHECK32-LABEL: func @memref_i8() |
| // CHECK32: %[[M:.+]] = memref.alloc() : memref<1xi32, 1> |
| // CHECK32: %[[C0:.+]] = arith.constant 0 : index |
| // CHECK32: %[[V:.+]] = memref.load %[[M]][%[[C0]]] : memref<1xi32, 1> |
| // CHECK32: %[[C24:.+]] = arith.constant 24 : index |
| // CHECK32: %[[CAST:.+]] = arith.index_cast %[[C24]] : index to i32 |
| // CHECK32: %[[SHIFTRT:.+]] = arith.shrsi %[[V]], %[[CAST]] |
| // CHECK32: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i32 to i8 |
| // CHECK32-NEXT: return %[[TRUNC]] |
| |
| // ----- |
| |
| func.func @memref_load_i4(%arg0: index) -> i4 { |
| %0 = memref.alloc() : memref<5xi4> |
| %1 = memref.load %0[%arg0] : memref<5xi4> |
| return %1 : i4 |
| } |
| // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 floordiv 2)> |
| // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 2) * 8) |
| // CHECK: func @memref_load_i4( |
| // CHECK-SAME: %[[ARG0:.+]]: index |
| // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8> |
| // CHECK: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] |
| // CHECK: %[[LOADVAL:.+]] = memref.load %[[ALLOC]][%[[INDEX]]] |
| // CHECK: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]] |
| // CHECK: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i8 |
| // CHECK: %[[SHIFTRT:.+]] = arith.shrsi %[[LOADVAL]], %[[CAST]] |
| // CHECK: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i8 to i4 |
| // CHECK: return %[[TRUNC]] |
| |
| // CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 floordiv 8)> |
| // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 8) * 32) |
| // CHECK32: func @memref_load_i4( |
| // CHECK32-SAME: %[[ARG0:.+]]: index |
| // CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<1xi32> |
| // CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] |
| // CHECK32: %[[LOADVAL:.+]] = memref.load %[[ALLOC]][%[[INDEX]]] |
| // CHECK32: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]] |
| // CHECK32: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i32 |
| // CHECK32: %[[SHIFTRT:.+]] = arith.shrsi %[[LOADVAL]], %[[CAST]] |
| // CHECK32: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i32 to i4 |
| // CHECK32: return %[[TRUNC]] |
| |
| // ----- |
| |
| func.func @memref_load_i4_rank2(%arg0: index, %arg1: index) -> i4 { |
| %0 = memref.alloc() : memref<3x125xi4> |
| memref.assume_alignment %0, 64 : memref<3x125xi4> |
| %1 = memref.load %0[%arg0,%arg1] : memref<3x125xi4> |
| return %1 : i4 |
| } |
| // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * 125 + s1) floordiv 2)> |
| // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 * 500 + s1 * 4 - ((s0 * 125 + s1) floordiv 2) * 8) |
| // CHECK: func @memref_load_i4_rank2( |
| // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index |
| // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index |
| // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<188xi8> |
| // CHECK: memref.assume_alignment %[[ALLOC]], 64 : memref<188xi8> |
| // CHECK: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK: %[[LOAD:.+]] = memref.load %[[ALLOC]][%[[INDEX]]] |
| // CHECK: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i8 |
| // CHECK: %[[SHIFTRT:.+]] = arith.shrsi %[[LOAD]], %[[CAST]] |
| // CHECK: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i8 to i4 |
| // CHECK: return %[[TRUNC]] |
| |
| // CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * 125 + s1) floordiv 8)> |
| // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 * 500 + s1 * 4 - ((s0 * 125 + s1) floordiv 8) * 32) |
| // CHECK32: func @memref_load_i4_rank2( |
| // CHECK32-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index |
| // CHECK32-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index |
| // CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<47xi32> |
| // CHECK32: memref.assume_alignment %[[ALLOC]], 64 : memref<47xi32> |
| // CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK32: %[[LOAD:.+]] = memref.load %[[ALLOC]][%[[INDEX]]] |
| // CHECK32: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK32: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i32 |
| // CHECK32: %[[SHIFTRT:.+]] = arith.shrsi %[[LOAD]], %[[CAST]] |
| // CHECK32: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i32 to i4 |
| // CHECK32: return %[[TRUNC]] |
| |
| // ----- |
| |
| func.func @memref_load_i4_dynamic(%arg0: index, %arg1 : index, %arg2 : index, %arg3 : index) -> i4 { |
| %0 = memref.alloc(%arg0, %arg1) : memref<?x?xi4> |
| %1 = memref.load %0[%arg2, %arg3] : memref<?x?xi4> |
| return %1 : i4 |
| } |
| // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)> |
| // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 2)> |
| // CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1, s2] -> ((s0 * s1) * 4 + s2 * 4 - ((s2 + s0 * s1) floordiv 2) * 8)> |
| // CHECK: func @memref_load_i4_dynamic( |
| // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index |
| // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index |
| // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index |
| // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index |
| // CHECK: %[[SIZE:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) |
| // CHECK: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] |
| // CHECK: %[[LOAD:.+]] = memref.load %[[ALLOC]][%[[INDEX]]] |
| // CHECK: %[[BITOFFSET:.+]] = affine.apply #[[MAP2]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] |
| // CHECK: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i8 |
| // CHECK: %[[SHIFTRT:.+]] = arith.shrsi %[[LOAD]], %[[CAST]] |
| // CHECK: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i8 to i4 |
| // CHECK: return %[[TRUNC]] |
| |
| // CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8)> |
| // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 8)> |
| // CHECK32-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1, s2] -> ((s0 * s1) * 4 + s2 * 4 - ((s2 + s0 * s1) floordiv 8) * 32)> |
| // CHECK32: func @memref_load_i4_dynamic( |
| // CHECK32-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index |
| // CHECK32-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index |
| // CHECK32-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index |
| // CHECK32-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index |
| // CHECK32: %[[SIZE:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK32: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) |
| // CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] |
| // CHECK32: %[[LOAD:.+]] = memref.load %[[ALLOC]][%[[INDEX]]] |
| // CHECK32: %[[BITOFFSET:.+]] = affine.apply #[[MAP2]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] |
| // CHECK32: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i32 |
| // CHECK32: %[[SHIFTRT:.+]] = arith.shrsi %[[LOAD]], %[[CAST]] |
| // CHECK32: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i32 to i4 |
| // CHECK32: return %[[TRUNC]] |
| |
| // ----- |
| |
| func.func @rank_zero_memref() -> i4 { |
| %0 = memref.alloc() : memref<i4> |
| %1 = memref.load %0[] : memref<i4> |
| return %1 : i4 |
| } |
| // CHECK-LABEL: func @rank_zero_memref() |
| // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<i8> |
| // CHECK: %[[LOAD:.+]] = memref.load %[[ALLOC]][] : memref<i8> |
| // CHECK: %[[TRUNC:.+]] = arith.trunci %[[LOAD]] : i8 to i4 |
| // CHECK: return %[[TRUNC]] |
| |
| // CHECK32-LABEL: func @rank_zero_memref() |
| // CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<i32> |
| // CHECK32: %[[LOAD:.+]] = memref.load %[[ALLOC]][] : memref<i32> |
| // CHECK32: %[[TRUNC:.+]] = arith.trunci %[[LOAD]] : i32 to i4 |
| // CHECK32: return %[[TRUNC]] |
| |
| // ----- |
| |
| func.func @memref_strided_i4(%idx : index) -> i4 { |
| %arr = memref.alloc() : memref<128xi4> |
| %subview = memref.subview %arr[32] [32] [1] : memref<128xi4> to memref<32xi4, strided<[1], offset:32>> |
| %1 = memref.load %subview[%idx] : memref<32xi4, strided<[1], offset:32>> |
| return %1 : i4 |
| } |
| |
| // CHECK-LABEL: func @memref_strided_i4 |
| // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<64xi8> |
| // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ALLOC]][16] [16] [1] : memref<64xi8> to memref<16xi8, strided<[1], offset: 16>> |
| // CHECK: %[[LOAD:.+]] = memref.load %[[SUBVIEW]] |
| |
| // CHECK32-LABEL: func @memref_strided_i4 |
| // CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<16xi32> |
| // CHECK32: %[[SUBVIEW:.+]] = memref.subview %[[ALLOC]][4] [4] [1] : memref<16xi32> to memref<4xi32, strided<[1], offset: 4>> |
| // CHECK32: %[[LOAD:.+]] = memref.load %[[SUBVIEW]] |
| |
| // ----- |
| |
| func.func @reinterpret_cast_memref_load_0D() -> i4 { |
| %0 = memref.alloc() : memref<5xi4> |
| %reinterpret_cast_0 = memref.reinterpret_cast %0 to offset: [0], sizes: [], strides: [] : memref<5xi4> to memref<i4> |
| %1 = memref.load %reinterpret_cast_0[] : memref<i4> |
| return %1 : i4 |
| } |
| // CHECK-LABEL: func @reinterpret_cast_memref_load_0D() |
| // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8> |
| // CHECK: %[[RE_CAST:.+]] = memref.reinterpret_cast %[[ALLOC]] to offset: [0], sizes: [], strides: [] : memref<3xi8> to memref<i8> |
| // CHECK: %[[LOAD:.+]] = memref.load %[[RE_CAST]][] : memref<i8> |
| // CHECK: %[[TRUNC:.+]] = arith.trunci %[[LOAD]] : i8 to i4 |
| // CHECK: return %[[TRUNC]] |
| |
| // CHECK32-LABEL: func @reinterpret_cast_memref_load_0D() |
| // CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<1xi32> |
| // CHECK32: %[[RE_CAST:.+]] = memref.reinterpret_cast %[[ALLOC]] to offset: [0], sizes: [], strides: [] : memref<1xi32> to memref<i32> |
| // CHECK32: %[[LOAD:.+]] = memref.load %[[RE_CAST]][] : memref<i32> |
| // CHECK32: %[[TRUNC:.+]] = arith.trunci %[[LOAD]] : i32 to i4 |
| // CHECK32: return %[[TRUNC]] |
| |
| // ----- |
| |
| func.func @reinterpret_cast_memref_load_1D(%arg0: index) -> i4 { |
| %0 = memref.alloc() : memref<5x5xi4> |
| %reinterpret_cast_0 = memref.reinterpret_cast %0 to offset: [8], sizes: [25], strides: [1] : memref<5x5xi4> to memref<25xi4, strided<[1], offset:8>> |
| %1 = memref.load %reinterpret_cast_0[%arg0] : memref<25xi4, strided<[1], offset:8>> |
| return %1 : i4 |
| } |
| // CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 floordiv 2)> |
| // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 2) * 8)> |
| // CHECK: func @reinterpret_cast_memref_load_1D( |
| // CHECK-SAME: %[[ARG0:.+]]: index |
| // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<13xi8> |
| // CHECK: %[[RE_CAST:.+]] = memref.reinterpret_cast %[[ALLOC]] to offset: [4], sizes: [13], strides: [1] : memref<13xi8> to memref<13xi8, strided<[1], offset: 4>> |
| // CHECK: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] |
| // CHECK: %[[LOAD:.+]] = memref.load %[[RE_CAST]][%[[INDEX]]] : memref<13xi8, strided<[1], offset: 4>> |
| // CHECK: %[[OFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]] |
| // CHECK: %[[CAST:.+]] = arith.index_cast %[[OFFSET]] : index to i8 |
| // CHECK: %[[SHR:.+]] = arith.shrsi %[[LOAD]], %[[CAST]] : i8 |
| // CHECK: %[[TRUNC:.+]] = arith.trunci %[[SHR]] : i8 to i4 |
| // CHECK: return %[[TRUNC]] |
| |
| // CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 floordiv 8)> |
| // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 8) * 32)> |
| // CHECK32: func @reinterpret_cast_memref_load_1D( |
| // CHECK32-SAME: %[[ARG0:.+]]: index |
| // CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<4xi32> |
| // CHECK32: %[[RE_CAST:.+]] = memref.reinterpret_cast %[[ALLOC]] to offset: [1], sizes: [4], strides: [1] : memref<4xi32> to memref<4xi32, strided<[1], offset: 1>> |
| // CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] |
| // CHECK32: %[[LOAD:.+]] = memref.load %[[RE_CAST]][%[[INDEX]]] : memref<4xi32, strided<[1], offset: 1>> |
| // CHECK32: %[[OFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]] |
| // CHECK32: %[[CAST:.+]] = arith.index_cast %[[OFFSET]] : index to i32 |
| // CHECK32: %[[SHR:.+]] = arith.shrsi %[[LOAD]], %[[CAST]] : i32 |
| // CHECK32: %[[TRUNC:.+]] = arith.trunci %[[SHR]] : i32 to i4 |
| // CHECK32: return %[[TRUNC]] |
| |
| // ----- |
| |
| func.func @memref_alloca_load_i4(%arg0: index) -> i4 { |
| %0 = memref.alloca() : memref<5xi4> |
| %1 = memref.load %0[%arg0] : memref<5xi4> |
| return %1 : i4 |
| } |
| // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 floordiv 2)> |
| // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 2) * 8) |
| // CHECK: func @memref_alloca_load_i4( |
| // CHECK-SAME: %[[ARG0:.+]]: index |
| // CHECK: %[[ALLOCA:.+]] = memref.alloca() : memref<3xi8> |
| // CHECK: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] |
| // CHECK: %[[LOADVAL:.+]] = memref.load %[[ALLOCA]][%[[INDEX]]] |
| // CHECK: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]] |
| // CHECK: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i8 |
| // CHECK: %[[SHIFTRT:.+]] = arith.shrsi %[[LOADVAL]], %[[CAST]] |
| // CHECK: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i8 to i4 |
| // CHECK: return %[[TRUNC]] |
| |
| // CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 floordiv 8)> |
| // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 8) * 32) |
| // CHECK32: func @memref_alloca_load_i4( |
| // CHECK32-SAME: %[[ARG0:.+]]: index |
| // CHECK32: %[[ALLOCA:.+]] = memref.alloca() : memref<1xi32> |
| // CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] |
| // CHECK32: %[[LOADVAL:.+]] = memref.load %[[ALLOCA]][%[[INDEX]]] |
| // CHECK32: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]] |
| // CHECK32: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i32 |
| // CHECK32: %[[SHIFTRT:.+]] = arith.shrsi %[[LOADVAL]], %[[CAST]] |
| // CHECK32: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i32 to i4 |
| // CHECK32: return %[[TRUNC]] |
| |
| // ----- |
| |
| func.func @memref_store_i4(%arg0: index, %arg1: i4) -> () { |
| %0 = memref.alloc() : memref<5xi4> |
| memref.store %arg1, %0[%arg0] : memref<5xi4> |
| return |
| } |
| // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 floordiv 2)> |
| // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 2) * 8)> |
| // CHECK: func @memref_store_i4( |
| // CHECK-SAME: %[[ARG0:.+]]: index, %[[ARG1:.+]]: i4 |
| // CHECK-DAG: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8> |
| // CHECK-DAG: %[[EXTUI:.+]] = arith.extui %[[ARG1]] : i4 to i8 |
| // CHECK-DAG: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] |
| // CHECK-DAG: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]] |
| // CHECK-DAG: %[[BITOFFSET_I8:.+]] = arith.index_cast %[[BITOFFSET]] : index to i8 |
| // CHECK-DAG: %[[MASK_BASE:.+]] = arith.constant 15 : i8 |
| // CHECK-DAG: %[[MASK_SHIFTED:.+]] = arith.shli %[[MASK_BASE]], %[[BITOFFSET_I8]] : i8 |
| // CHECK-DAG: %[[CST_NEG_ONE:.+]] = arith.constant -1 : i8 |
| // CHECK-DAG: %[[MASK:.+]] = arith.xori %[[MASK_SHIFTED]], %[[CST_NEG_ONE]] : i8 |
| // CHECK-DAG: %[[SHIFTED_VAL:.+]] = arith.shli %[[EXTUI]], %[[BITOFFSET_I8]] : i8 |
| // CHECK: %[[CLEAR_RMW:.+]] = memref.atomic_rmw andi %[[MASK]], %[[ALLOC]][%[[INDEX]]] : (i8, memref<3xi8>) -> i8 |
| // CHECK: %[[WRITE_RMW:.+]] = memref.atomic_rmw ori %[[SHIFTED_VAL]], %[[ALLOC]][%[[INDEX]]] : (i8, memref<3xi8>) -> i8 |
| // CHECK: return |
| |
| // CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 floordiv 8)> |
| // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 8) * 32)> |
| // CHECK32: func @memref_store_i4( |
| // CHECK32-SAME: %[[ARG0:.+]]: index, %[[ARG1:.+]]: i4 |
| // CHECK32-DAG: %[[ALLOC:.+]] = memref.alloc() : memref<1xi32> |
| // CHECK32-DAG: %[[EXTUI:.+]] = arith.extui %[[ARG1]] : i4 to i32 |
| // CHECK32-DAG: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] |
| // CHECK32-DAG: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]] |
| // CHECK32-DAG: %[[BITOFFSET_I32:.+]] = arith.index_cast %[[BITOFFSET]] : index to i32 |
| // CHECK32-DAG: %[[MASK_BASE:.+]] = arith.constant 15 : i32 |
| // CHECK32-DAG: %[[MASK_SHIFTED:.+]] = arith.shli %[[MASK_BASE]], %[[BITOFFSET_I32]] : i32 |
| // CHECK32-DAG: %[[CST_NEG_ONE:.+]] = arith.constant -1 : i32 |
| // CHECK32-DAG: %[[MASK:.+]] = arith.xori %[[MASK_SHIFTED]], %[[CST_NEG_ONE]] : i32 |
| // CHECK32-DAG: %[[SHIFTED_VAL:.+]] = arith.shli %[[EXTUI]], %[[BITOFFSET_I32]] : i32 |
| // CHECK32: %[[CLEAR_RMW:.+]] = memref.atomic_rmw andi %[[MASK]], %[[ALLOC]][%[[INDEX]]] : (i32, memref<1xi32>) -> i32 |
| // CHECK32: %[[WRITE_RMW:.+]] = memref.atomic_rmw ori %[[SHIFTED_VAL]], %[[ALLOC]][%[[INDEX]]] : (i32, memref<1xi32>) -> i32 |
| // CHECK32: return |
| |
| // ----- |
| |
| func.func @memref_store_i4_rank2(%arg0: index, %arg1: index, %arg2: i4) -> () { |
| %0 = memref.alloc() : memref<3x125xi4> |
| memref.assume_alignment %0, 64 : memref<3x125xi4> |
| memref.store %arg2, %0[%arg0,%arg1] : memref<3x125xi4> |
| return |
| } |
| // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * 125 + s1) floordiv 2)> |
| // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 * 500 + s1 * 4 - ((s0 * 125 + s1) floordiv 2) * 8)> |
| // CHECK: func @memref_store_i4_rank2( |
| // CHECK-SAME: %[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: i4 |
| // CHECK-DAG: %[[ALLOC:.+]] = memref.alloc() : memref<188xi8> |
| // CHECK-DAG: memref.assume_alignment %[[ALLOC]], 64 : memref<188xi8> |
| // CHECK-DAG: %[[EXTUI:.+]] = arith.extui %[[ARG2]] : i4 to i8 |
| // CHECK-DAG: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK-DAG: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK-DAG: %[[BITOFFSET_I8:.+]] = arith.index_cast %[[BITOFFSET]] : index to i8 |
| // CHECK-DAG: %[[MASK_BASE:.+]] = arith.constant 15 : i8 |
| // CHECK-DAG: %[[MASK_SHIFTED:.+]] = arith.shli %[[MASK_BASE]], %[[BITOFFSET_I8]] : i8 |
| // CHECK-DAG: %[[CST_NEG_ONE:.+]] = arith.constant -1 : i8 |
| // CHECK-DAG: %[[MASK:.+]] = arith.xori %[[MASK_SHIFTED]], %[[CST_NEG_ONE]] : i8 |
| // CHECK-DAG: %[[SHIFTED_VAL:.+]] = arith.shli %[[EXTUI]], %[[BITOFFSET_I8]] : i8 |
| // CHECK: %[[CLEAR_RMW:.+]] = memref.atomic_rmw andi %[[MASK]], %[[ALLOC]][%[[INDEX]]] : (i8, memref<188xi8>) -> i8 |
| // CHECK: %[[WRITE_RMW:.+]] = memref.atomic_rmw ori %[[SHIFTED_VAL]], %[[ALLOC]][%[[INDEX]]] : (i8, memref<188xi8>) -> i8 |
| // CHECK: return |
| |
| // CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * 125 + s1) floordiv 8)> |
| // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 * 500 + s1 * 4 - ((s0 * 125 + s1) floordiv 8) * 32)> |
| // CHECK32: func @memref_store_i4_rank2( |
| // CHECK32-SAME: %[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: i4 |
| // CHECK32-DAG: %[[ALLOC:.+]] = memref.alloc() : memref<47xi32> |
| // CHECK32-DAG: memref.assume_alignment %[[ALLOC]], 64 : memref<47xi32> |
| // CHECK32-DAG: %[[EXTUI:.+]] = arith.extui %[[ARG2]] : i4 to i32 |
| // CHECK32-DAG: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK32-DAG: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK32-DAG: %[[BITOFFSET_I32:.+]] = arith.index_cast %[[BITOFFSET]] : index to i32 |
| // CHECK32-DAG: %[[MASK_BASE:.+]] = arith.constant 15 : i32 |
| // CHECK32-DAG: %[[MASK_SHIFTED:.+]] = arith.shli %[[MASK_BASE]], %[[BITOFFSET_I32]] : i32 |
| // CHECK32-DAG: %[[CST_NEG_ONE:.+]] = arith.constant -1 : i32 |
| // CHECK32-DAG: %[[MASK:.+]] = arith.xori %[[MASK_SHIFTED]], %[[CST_NEG_ONE]] : i32 |
| // CHECK32-DAG: %[[SHIFTED_VAL:.+]] = arith.shli %[[EXTUI]], %[[BITOFFSET_I32]] : i32 |
| // CHECK32: %[[CLEAR_RMW:.+]] = memref.atomic_rmw andi %[[MASK]], %[[ALLOC]][%[[INDEX]]] : (i32, memref<47xi32>) -> i32 |
| // CHECK32: %[[WRITE_RMW:.+]] = memref.atomic_rmw ori %[[SHIFTED_VAL]], %[[ALLOC]][%[[INDEX]]] : (i32, memref<47xi32>) -> i32 |
| // CHECK32: return |
| |
| // ----- |
| |
| func.func @memref_store_i4_dynamic(%arg0: index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4: i4) -> () { |
| %0 = memref.alloc(%arg0, %arg1) : memref<?x?xi4> |
| memref.store %arg4, %0[%arg2, %arg3] : memref<?x?xi4> |
| return |
| } |
| // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)> |
| // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 2)> |
| // CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1, s2] -> ((s0 * s1) * 4 + s2 * 4 - ((s2 + s0 * s1) floordiv 2) * 8)> |
| // CHECK: func @memref_store_i4_dynamic( |
| // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index |
| // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index |
| // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index |
| // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index |
| // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: i4 |
| // CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK-DAG: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) : memref<?xi8> |
| // CHECK-DAG: %[[EXTUI:.+]] = arith.extui %[[ARG4]] : i4 to i8 |
| // CHECK-DAG: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] |
| // CHECK-DAG: %[[BITOFFSET:.+]] = affine.apply #[[MAP2]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] |
| // CHECK-DAG: %[[BITOFFSET_I8:.+]] = arith.index_cast %[[BITOFFSET]] : index to i8 |
| // CHECK-DAG: %[[MASK_BASE:.+]] = arith.constant 15 : i8 |
| // CHECK-DAG: %[[MASK_SHIFTED:.+]] = arith.shli %[[MASK_BASE]], %[[BITOFFSET_I8]] : i8 |
| // CHECK-DAG: %[[CST_NEG_ONE:.+]] = arith.constant -1 : i8 |
| // CHECK-DAG: %[[MASK:.+]] = arith.xori %[[MASK_SHIFTED]], %[[CST_NEG_ONE]] : i8 |
| // CHECK-DAG: %[[SHIFTED_VAL:.+]] = arith.shli %[[EXTUI]], %[[BITOFFSET_I8]] : i8 |
| // CHECK: %[[CLEAR_RMW:.+]] = memref.atomic_rmw andi %[[MASK]], %[[ALLOC]][%[[INDEX]]] : (i8, memref<?xi8>) -> i8 |
| // CHECK: %[[WRITE_RMW:.+]] = memref.atomic_rmw ori %[[SHIFTED_VAL]], %[[ALLOC]][%[[INDEX]]] : (i8, memref<?xi8>) -> i8 |
| // CHECK: return |
| |
| // CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8)> |
| // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 8)> |
| // CHECK32-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1, s2] -> ((s0 * s1) * 4 + s2 * 4 - ((s2 + s0 * s1) floordiv 8) * 32)> |
| // CHECK32: func @memref_store_i4_dynamic( |
| // CHECK32-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index |
| // CHECK32-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index |
| // CHECK32-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index |
| // CHECK32-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index |
| // CHECK32-SAME: %[[ARG4:[a-zA-Z0-9]+]]: i4 |
| // CHECK32-DAG: %[[SIZE:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] |
| // CHECK32-DAG: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) : memref<?xi32> |
| // CHECK32-DAG: %[[EXTUI:.+]] = arith.extui %[[ARG4]] : i4 to i32 |
| // CHECK32-DAG: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] |
| // CHECK32-DAG: %[[BITOFFSET:.+]] = affine.apply #[[MAP2]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] |
| // CHECK32-DAG: %[[BITOFFSET_I32:.+]] = arith.index_cast %[[BITOFFSET]] : index to i32 |
| // CHECK32-DAG: %[[MASK_BASE:.+]] = arith.constant 15 : i32 |
| // CHECK32-DAG: %[[MASK_SHIFTED:.+]] = arith.shli %[[MASK_BASE]], %[[BITOFFSET_I32]] : i32 |
| // CHECK32-DAG: %[[CST_NEG_ONE:.+]] = arith.constant -1 : i32 |
| // CHECK32-DAG: %[[MASK:.+]] = arith.xori %[[MASK_SHIFTED]], %[[CST_NEG_ONE]] : i32 |
| // CHECK32-DAG: %[[SHIFTED_VAL:.+]] = arith.shli %[[EXTUI]], %[[BITOFFSET_I32]] : i32 |
| // CHECK32: %[[CLEAR_RMW:.+]] = memref.atomic_rmw andi %[[MASK]], %[[ALLOC]][%[[INDEX]]] : (i32, memref<?xi32>) -> i32 |
| // CHECK32: %[[WRITE_RMW:.+]] = memref.atomic_rmw ori %[[SHIFTED_VAL]], %[[ALLOC]][%[[INDEX]]] : (i32, memref<?xi32>) -> i32 |
| // CHECK32: return |
| |
| // ----- |
| |
| func.func @rank_zero_memref_store(%arg0: i4) -> () { |
| %0 = memref.alloc() : memref<i4> |
| memref.store %arg0, %0[] : memref<i4> |
| return |
| } |
| // CHECK-LABEL: func @rank_zero_memref |
| // CHECK-SAME: %[[ARG0:.+]]: i4 |
| // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<i8> |
| // CHECK: %[[EXTUI:.+]] = arith.extui %[[ARG0]] : i4 to i8 |
| // CHECK: %[[WRITE_RMW:.+]] = memref.atomic_rmw assign %[[EXTUI]], %[[ALLOC]][] : (i8, memref<i8>) -> i8 |
| // CHECK: return |
| |
| // CHECK32-LABEL: func @rank_zero_memref |
| // CHECK32-SAME: %[[ARG0:.+]]: i4 |
| // CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<i32> |
| // CHECK32: %[[EXTUI:.+]] = arith.extui %[[ARG0]] : i4 to i32 |
| // CHECK32: %[[WRITE_RMW:.+]] = memref.atomic_rmw assign %[[EXTUI]], %[[ALLOC]][] : (i32, memref<i32>) -> i32 |
| // CHECK32: return |