blob: 050fe55747d23651494d0f7c0a6dfef4ce5c38e5 [file] [log] [blame]
// RUN: fir-opt %s --pass-pipeline="builtin.module(acc-initialize-fir-analyses,acc-implicit-data)" -split-input-file | FileCheck %s
// -----
func.func @test_fir_scalar_in_serial() {
%livein = fir.alloca i64 {bindc_name = "scalarvar"}
acc.serial {
%load = fir.load %livein : !fir.ref<i64>
acc.yield
}
return
}
// CHECK: acc.firstprivate varPtr({{.*}} : !fir.ref<i64>) recipe({{.*}}) -> !fir.ref<i64> {implicit = true, name = "scalarvar"}
// -----
func.func @test_fir_scalar_in_parallel() {
%livein = fir.alloca f32 {bindc_name = "scalarvar"}
acc.parallel {
%load = fir.load %livein : !fir.ref<f32>
acc.yield
}
return
}
// CHECK: acc.firstprivate varPtr({{.*}} : !fir.ref<f32>) recipe({{.*}}) -> !fir.ref<f32> {implicit = true, name = "scalarvar"}
// -----
func.func @test_fir_scalar_in_kernels() {
%livein = fir.alloca f64 {bindc_name = "scalarvar"}
acc.kernels {
%load = fir.load %livein : !fir.ref<f64>
acc.terminator
}
return
}
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<f64>) -> !fir.ref<f64> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarvar"}
// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<f64>) to varPtr({{.*}} : !fir.ref<f64>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarvar"}
// -----
func.func @test_fir_scalar_in_parallel_defaultnone() {
%livein = fir.alloca f32 {bindc_name = "scalarvar"}
acc.parallel {
%load = fir.load %livein : !fir.ref<f32>
acc.yield
} attributes {defaultAttr = #acc<defaultvalue none>}
return
}
// CHECK-NOT: acc.firstprivate
// -----
func.func @test_fir_scalar_in_kernels_defaultnone() {
%livein = fir.alloca f64 {bindc_name = "scalarvar"}
acc.kernels {
%load = fir.load %livein : !fir.ref<f64>
acc.terminator
} attributes {defaultAttr = #acc<defaultvalue none>}
return
}
// CHECK-NOT: acc.copyin
// -----
func.func @test_fir_derivedtype_in_parallel() {
%livein = fir.alloca !fir.type<_QFTaggr{field:f32}> {bindc_name = "aggrvar"}
acc.parallel {
%load = fir.load %livein : !fir.ref<!fir.type<_QFTaggr{field:f32}>>
acc.yield
}
return
}
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar"}
// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) to varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar"}
// -----
func.func @test_fir_derivedtype_in_kernels() {
%livein = fir.alloca !fir.type<_QFTaggr{field:f32}> {bindc_name = "aggrvar"}
acc.kernels {
%load = fir.load %livein : !fir.ref<!fir.type<_QFTaggr{field:f32}>>
acc.terminator
}
return
}
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar"}
// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) to varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar"}
// -----
func.func @test_fir_array_in_parallel() {
%livein = fir.alloca !fir.array<10xf32> {bindc_name = "arrayvar"}
acc.parallel {
%load = fir.load %livein : !fir.ref<!fir.array<10xf32>>
acc.yield
}
return
}
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.array<10xf32>>) to varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
// -----
func.func @test_fir_array_in_kernels() {
%livein = fir.alloca !fir.array<10xf32> {bindc_name = "arrayvar"}
acc.kernels {
%load = fir.load %livein : !fir.ref<!fir.array<10xf32>>
acc.terminator
}
return
}
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.array<10xf32>>) to varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
// -----
func.func @test_fir_derivedtype_in_parallel_defaultpresent() {
%livein = fir.alloca !fir.type<_QFTaggr{field:f32}> {bindc_name = "aggrvar"}
acc.parallel {
%load = fir.load %livein : !fir.ref<!fir.type<_QFTaggr{field:f32}>>
acc.yield
} attributes {defaultAttr = #acc<defaultvalue present>}
return
}
// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {acc.from_default, implicit = true, name = "aggrvar"}
// CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "aggrvar"}
// -----
func.func @test_fir_derivedtype_in_kernels_defaultpresent() {
%livein = fir.alloca !fir.type<_QFTaggr{field:f32}> {bindc_name = "aggrvar"}
acc.kernels {
%load = fir.load %livein : !fir.ref<!fir.type<_QFTaggr{field:f32}>>
acc.terminator
} attributes {defaultAttr = #acc<defaultvalue present>}
return
}
// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {acc.from_default, implicit = true, name = "aggrvar"}
// CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "aggrvar"}
// -----
func.func @test_fir_array_in_parallel_defaultpresent() {
%livein = fir.alloca !fir.array<10xf32> {bindc_name = "arrayvar"}
acc.parallel {
%load = fir.load %livein : !fir.ref<!fir.array<10xf32>>
acc.yield
} attributes {defaultAttr = #acc<defaultvalue present>}
return
}
// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {acc.from_default, implicit = true, name = "arrayvar"}
// CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "arrayvar"}
// -----
func.func @test_fir_array_in_kernels_defaultpresent() {
%livein = fir.alloca !fir.array<10xf32> {bindc_name = "arrayvar"}
acc.kernels {
%load = fir.load %livein : !fir.ref<!fir.array<10xf32>>
acc.terminator
} attributes {defaultAttr = #acc<defaultvalue present>}
return
}
// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {acc.from_default, implicit = true, name = "arrayvar"}
// CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "arrayvar"}
// -----
func.func @test_fir_scalar_in_parallel_defaultpresent() {
%livein = fir.alloca f32 {bindc_name = "scalarvar"}
acc.parallel {
%load = fir.load %livein : !fir.ref<f32>
acc.yield
} attributes {defaultAttr = #acc<defaultvalue present>}
return
}
// CHECK: acc.firstprivate varPtr({{.*}} : !fir.ref<f32>) recipe({{.*}}) -> !fir.ref<f32> {implicit = true, name = "scalarvar"}
// -----
func.func @test_fir_scalar_in_kernels_defaultpresent() {
%livein = fir.alloca f64 {bindc_name = "scalarvar"}
acc.kernels {
%load = fir.load %livein : !fir.ref<f64>
acc.terminator
} attributes {defaultAttr = #acc<defaultvalue present>}
return
}
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<f64>) -> !fir.ref<f64> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarvar"}
// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<f64>) to varPtr({{.*}} : !fir.ref<f64>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarvar"}
// -----
func.func @test_fir_box_ref() {
%livein = fir.alloca !fir.box<!fir.array<?xi32>> {bindc_name = "descriptor"}
acc.parallel {
%load = fir.load %livein : !fir.ref<!fir.box<!fir.array<?xi32>>>
acc.yield
}
return
}
// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "descriptor"}
// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) to varPtr({{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "descriptor"}
// -----
func.func @test_fir_box_val() {
%desc = fir.alloca !fir.box<!fir.array<?xi32>> {bindc_name = "descriptor"}
%livein = fir.load %desc : !fir.ref<!fir.box<!fir.array<?xi32>>>
acc.parallel {
%addr = fir.box_addr %livein : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
acc.yield
}
return
}
// CHECK: %[[COPYIN:.*]] = acc.copyin var({{.*}} : !fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "descriptor"}
// CHECK: acc.copyout accVar(%[[COPYIN]] : !fir.box<!fir.array<?xi32>>) to var({{.*}} : !fir.box<!fir.array<?xi32>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "descriptor"}
// -----
// This test has an explicit data clause for the box - but the pointer held
// inside the box is used in the region instead of the box itself. Test that
// implicit present is actually used.
func.func @test_explicit_box_implicit_ptr() {
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
%arr = fir.alloca !fir.array<10xf32> {bindc_name = "aa"}
%shape = fir.shape %c10 : (index) -> !fir.shape<1>
%arr_decl = fir.declare %arr(%shape) {uniq_name = "aa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<10xf32>>
%box = fir.embox %arr_decl(%shape) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf32>>
%copyin = acc.copyin var(%box : !fir.box<!fir.array<10xf32>>) -> !fir.box<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "aa"}
acc.serial dataOperands(%copyin : !fir.box<!fir.array<10xf32>>) {
// Use the pointer, not the box
%elem = fir.array_coor %arr_decl(%shape) %c1 : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
acc.yield
}
acc.copyout accVar(%copyin : !fir.box<!fir.array<10xf32>>) to var(%box : !fir.box<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "aa"}
return
}
// CHECK: acc.present varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>){{.*}}-> !fir.ref<!fir.array<10xf32>> {implicit = true, name = "aa"}
// -----
// This test uses an explicit-shape array with no data clause - it also has
// an optimization where the pointer is used instead of the boxed entity.
// It tests that the implicit data pass is able to recover the size despite
// it not being encoded in the FIR type.
// It was generated from the following Fortran source:
// subroutine array(aa,nn)
// integer :: nn
// real :: aa(10:nn)
// !$acc kernels loop
// do ii = 10, nn
// aa(ii) = ii
// end do
// !$acc end kernels
// end subroutine
func.func @_QParray(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "aa"}, %arg1: !fir.ref<i32> {fir.bindc_name = "nn"}) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10_i64 = arith.constant 10 : i64
%0 = fir.dummy_scope : !fir.dscope
%1 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFarrayEnn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
%4 = fir.convert %c10_i64 : (i64) -> index
%5 = fir.load %1 : !fir.ref<i32>
%6 = fir.convert %5 : (i32) -> i64
%7 = fir.convert %6 : (i64) -> index
%8 = arith.subi %7, %4 : index
%9 = arith.addi %8, %c1 : index
%10 = arith.cmpi sgt, %9, %c0 : index
%11 = arith.select %10, %9, %c0 : index
%12 = fir.shape_shift %4, %11 : (index, index) -> !fir.shapeshift<1>
%13 = fir.declare %arg0(%12) dummy_scope %0 {uniq_name = "_QFarrayEaa"} : (!fir.ref<!fir.array<?xf32>>, !fir.shapeshift<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
acc.kernels {
%elem = fir.array_coor %13(%12) %4 : (!fir.ref<!fir.array<?xf32>>, !fir.shapeshift<1>, index) -> !fir.ref<f32>
acc.terminator
}
return
}
// This tries to confirm that the acc.bounds operation is as expected.
// Effectively the extent needs to be max(0, nn), stride needs to be 1,
// adjusted lowerbound is 0, and actual language start index is 10.
// CHECK: %[[NN:.*]] = fir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFarrayEnn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
// CHECK: %[[C10:.*]] = fir.convert %c10{{.*}} : (i64) -> index
// CHECK: %[[LOADEDNN:.*]] = fir.load %[[NN]] : !fir.ref<i32>
// CHECK: %[[CAST1:.*]] = fir.convert %[[LOADEDNN]] : (i32) -> i64
// CHECK: %[[CAST2:.*]] = fir.convert %[[CAST1]] : (i64) -> index
// CHECK: %[[SUBI:.*]] = arith.subi %[[CAST2]], %[[C10]] : index
// CHECK: %[[ADDI:.*]] = arith.addi %[[SUBI]], %c1{{.*}} : index
// CHECK: %[[CMPI:.*]] = arith.cmpi sgt, %[[ADDI]], %c0{{.*}} : index
// CHECK: %[[SELECT:.*]] = arith.select %[[CMPI]], %[[ADDI]], %c0{{.*}} : index
// CHECK: %[[BOUNDS:.*]] = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%{{.*}} : index) extent(%[[SELECT]] : index) stride(%c1{{.*}} : index) startIdx(%[[C10]] : index)
// CHECK: acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<?xf32>>) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aa"}
// -----
// Test to confirm that a copyin clause is not implicitly generated for deviceptr symbol.
func.func @test_deviceptr_no_implicit_copy() {
%c10 = arith.constant 10 : index
%arr = fir.alloca !fir.array<10xf64> {bindc_name = "a"}
%shape = fir.shape %c10 : (index) -> !fir.shape<1>
%arr_box = fir.embox %arr(%shape) : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf64>>
%devptr = acc.deviceptr var(%arr_box : !fir.box<!fir.array<10xf64>>) -> !fir.box<!fir.array<10xf64>> {name = "a"}
acc.parallel dataOperands(%devptr : !fir.box<!fir.array<10xf64>>) {
%elem = fir.box_addr %arr_box : (!fir.box<!fir.array<10xf64>>) -> !fir.ref<!fir.array<10xf64>>
acc.yield
}
return
}
// CHECK-NOT: acc.copyin
// CHECK: acc.deviceptr
// -----
// Test that acc.declare with deviceptr doesn't generate implicit copyin
func.func @test_acc_declare_deviceptr() {
%c10 = arith.constant 10 : index
%arr = fir.alloca !fir.array<10xf64> {bindc_name = "a"}
%shape = fir.shape %c10 : (index) -> !fir.shape<1>
%arr_box = fir.embox %arr(%shape) : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf64>>
%devptr = acc.deviceptr var(%arr_box : !fir.box<!fir.array<10xf64>>) -> !fir.box<!fir.array<10xf64>> {name = "a"}
%token = acc.declare_enter dataOperands(%devptr : !fir.box<!fir.array<10xf64>>)
acc.parallel {
%elem = fir.box_addr %arr_box : (!fir.box<!fir.array<10xf64>>) -> !fir.ref<!fir.array<10xf64>>
acc.yield
}
acc.declare_exit token(%token)
return
}
// CHECK-LABEL: func.func @test_acc_declare_deviceptr
// CHECK: acc.deviceptr
// CHECK-NOT: acc.copyin
// CHECK: acc.deviceptr
// -----
// Test that implicit deviceptr is generated for a symbol with CUDA device attribute
func.func @test_cuda_device_implicit_deviceptr() {
%0 = fir.dummy_scope : !fir.dscope
%1 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFEa"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
%2 = fir.zero_bits !fir.heap<!fir.array<?xf32>>
%c0 = arith.constant 0 : index
%3 = fir.shape %c0 : (index) -> !fir.shape<1>
%4 = fir.embox %2(%3) {allocator_idx = 2 : i32} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
fir.store %4 to %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
%5:2 = hlfir.declare %1 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%c1 = arith.constant 1 : index
%c16_i32 = arith.constant 16 : i32
%c0_i32 = arith.constant 0 : i32
%6 = fir.convert %5#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%7 = fir.convert %c1 : (index) -> i64
%8 = fir.convert %c16_i32 : (i32) -> i64
fir.call @_FortranAAllocatableSetBounds(%6, %c0_i32, %7, %8) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
%9 = cuf.allocate %5#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
acc.serial {
%cst = arith.constant 1.000000e+02 : f32
%10 = fir.load %5#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
%c5 = arith.constant 5 : index
%11 = hlfir.designate %10 (%c5) : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
hlfir.assign %cst to %11 : f32, !fir.ref<f32>
acc.yield
}
return
}
func.func private @_FortranAAllocatableSetBounds(!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
// CHECK-LABEL: func.func @test_cuda_device_implicit_deviceptr
// CHECK-NOT: acc.copyin
// CHECK: acc.deviceptr
// CHECK-NOT: acc.copyout