blob: 76651c132f738b4360116107b5bc277e3a7777fa [file] [log] [blame]
// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
void acc_cache() {
// CHECK: cir.func{{.*}} @acc_cache() {
int iArr[10];
// CHECK-NEXT: %[[IARR:.*]] = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["iArr"]
float fArr[10];
// CHECK-NEXT: %[[FARR:.*]] = cir.alloca !cir.array<!cir.float x 10>, !cir.ptr<!cir.array<!cir.float x 10>>, ["fArr"]
#pragma acc cache(iArr[1], fArr[1:5])
// This does nothing, as it is not in a loop.
#pragma acc parallel
{
#pragma acc cache(iArr[1], fArr[1:5])
// This does nothing, as it is not in a loop.
}
// CHECK-NEXT: acc.parallel {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc loop
for(int i = 0; i < 5; ++i) {
for(int j = 0; j < 5; ++j) {
#pragma acc cache(iArr[1], fArr[1:5])
}
}
// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
//
// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
// CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
// CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
//
// CHECK-NEXT: acc.loop cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
#pragma acc loop
for(int i = 0; i < 5; ++i) {
#pragma acc cache(iArr[1], fArr[1:5])
}
// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
//
// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
// CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
// CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
//
// CHECK-NEXT: acc.loop cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
#pragma acc parallel loop
for(int i = 0; i < 5; ++i) {
#pragma acc cache(iArr[1], fArr[1:5])
}
// CHECK-NEXT: acc.parallel combined(loop) {
// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
//
// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
// CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
// CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
//
// CHECK-NEXT: acc.loop combined(parallel) cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel loop
for(int i = 0; i < 5; ++i) {
int localArr[5];
// The first term here isn't lowered, because it references data inside of the 'loop'.
#pragma acc cache(localArr[i], iArr[1], fArr[1:5])
}
// CHECK-NEXT: acc.parallel combined(loop) {
// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
//
// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
// CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
// CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
//
// CHECK-NEXT: acc.loop combined(parallel) cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
}