[OpenACC][CIR] implement 'collapse' lowering for combined constructs
Another trivial implementation. It has a constant value that doesn't
require any insertion of instructions, so this just works with minimal
effort.
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index 8652a0f..8892c49 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -414,10 +414,10 @@
value = value.sextOrTrunc(64);
operation.setCollapseForDeviceTypes(builder.getContext(),
lastDeviceTypeValues, value);
+ } else if constexpr (isCombinedType<OpTy>) {
+ applyToLoopOp(clause);
} else {
- // TODO: When we've implemented this for everything, switch this to an
- // unreachable. Combined constructs remain.
- return clauseNotImplemented(clause);
+ llvm_unreachable("Unknown construct kind in VisitCollapseClause");
}
}
diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp
index 38ca45d..3b2ae8a 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp
@@ -134,4 +134,46 @@
// CHECK: acc.terminator
// CHECK-NEXT: } loc
+ #pragma acc parallel loop collapse(1) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.parallel combined(loop) {
+ // CHECK: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+
+ #pragma acc serial loop collapse(1) device_type(radeon) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.serial combined(loop) {
+ // CHECK: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+
+ #pragma acc kernels loop collapse(1) device_type(radeon, nvidia) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.kernels combined(loop) {
+ // CHECK: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
+ // CHECK: acc.terminator
+ // CHECK-NEXT: } loc
+ #pragma acc parallel loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.parallel combined(loop) {
+ // CHECK: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
}