blob: c44699ee12fcfd546fb53b83a0e862b30b8d2ee3 [file] [log] [blame]
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// Test host codegen.
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// Check target registration is registered as a Ctor.
template<typename tx, typename ty>
struct TT{
tx X;
ty Y;
};
int global;
extern int global;
int foo(int n) {
int a = 0;
short aa = 0;
float b[10];
float bn[n];
double c[5][10];
double cn[5][n];
TT<long long, char> d;
static long *plocal;
#pragma omp target teams loop device(global + a) depend(in: global) depend(out: a, b, cn[4])
for (int i = 0; i < 10; ++i) {
}
#pragma omp target teams loop device(global + a) depend(inout: global, a, bn) if(target:a)
for (int i = 0; i < *plocal; ++i) {
static int local1;
*plocal = global;
local1 = global;
}
#pragma omp target teams loop if(0) firstprivate(global) depend(out:global)
for (int i = 0; i < global; ++i) {
global += 1;
}
return a;
}
// Check that the offloading functions are emitted and that the arguments are
// correct and loaded correctly for the target regions in foo().
// Create stack storage and store argument in there.
#endif
// CHECK-64-LABEL: define {{[^@]+}}@_Z3fooi
// CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4
// CHECK-64-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [4 x %struct.kmp_depend_info], align 8
// CHECK-64-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8
// CHECK-64-NEXT: [[DOTDEP_ARR_ADDR5:%.*]] = alloca [3 x %struct.kmp_depend_info], align 8
// CHECK-64-NEXT: [[DEP_COUNTER_ADDR6:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_0]], align 8
// CHECK-64-NEXT: [[DOTDEP_ARR_ADDR8:%.*]] = alloca [3 x %struct.kmp_depend_info], align 8
// CHECK-64-NEXT: [[DEP_COUNTER_ADDR9:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[GLOBAL_CASTED10:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4
// CHECK-64-NEXT: [[DOTDEP_ARR_ADDR13:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8
// CHECK-64-NEXT: [[DEP_COUNTER_ADDR14:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4
// CHECK-64-NEXT: store i16 0, i16* [[AA]], align 2
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave()
// CHECK-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP2]], align 4
// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
// CHECK-64-NEXT: [[TMP6:%.*]] = mul nuw i64 5, [[TMP5]]
// CHECK-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP6]], align 8
// CHECK-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR1]], align 8
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* @global, align 4
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4
// CHECK-64-NEXT: [[TMP11:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i64 40, i64 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to %struct.kmp_task_t_with_privates*
// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP12]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP13]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP15:%.*]] = load i8*, i8** [[TMP14]], align 8
// CHECK-64-NEXT: [[TMP16:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 4, i1 false)
// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP17]], i64 0
// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP18]], i32 0, i32 0
// CHECK-64-NEXT: store i64 ptrtoint (i32* @global to i64), i64* [[TMP19]], align 8
// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP18]], i32 0, i32 1
// CHECK-64-NEXT: store i64 4, i64* [[TMP20]], align 8
// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP18]], i32 0, i32 2
// CHECK-64-NEXT: store i8 1, i8* [[TMP21]], align 8
// CHECK-64-NEXT: [[TMP22:%.*]] = ptrtoint i32* [[A]] to i64
// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP17]], i64 1
// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i32 0, i32 0
// CHECK-64-NEXT: store i64 [[TMP22]], i64* [[TMP24]], align 8
// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i32 0, i32 1
// CHECK-64-NEXT: store i64 4, i64* [[TMP25]], align 8
// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i32 0, i32 2
// CHECK-64-NEXT: store i8 3, i8* [[TMP26]], align 8
// CHECK-64-NEXT: [[TMP27:%.*]] = ptrtoint [10 x float]* [[B]] to i64
// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP17]], i64 2
// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 0
// CHECK-64-NEXT: store i64 [[TMP27]], i64* [[TMP29]], align 8
// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 1
// CHECK-64-NEXT: store i64 40, i64* [[TMP30]], align 8
// CHECK-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 2
// CHECK-64-NEXT: store i8 3, i8* [[TMP31]], align 8
// CHECK-64-NEXT: [[TMP32:%.*]] = mul nsw i64 4, [[TMP5]]
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP32]]
// CHECK-64-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP5]], 8
// CHECK-64-NEXT: [[TMP34:%.*]] = ptrtoint double* [[ARRAYIDX]] to i64
// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP17]], i64 3
// CHECK-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 0
// CHECK-64-NEXT: store i64 [[TMP34]], i64* [[TMP36]], align 8
// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 1
// CHECK-64-NEXT: store i64 [[TMP33]], i64* [[TMP37]], align 8
// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 2
// CHECK-64-NEXT: store i8 3, i8* [[TMP38]], align 8
// CHECK-64-NEXT: store i64 4, i64* [[DEP_COUNTER_ADDR]], align 8
// CHECK-64-NEXT: [[TMP39:%.*]] = bitcast %struct.kmp_depend_info* [[TMP17]] to i8*
// CHECK-64-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 4, i8* [[TMP39]], i32 0, i8* null, i32 0)
// CHECK-64-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP11]])
// CHECK-64-NEXT: [[TMP40:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP0]], %struct.kmp_task_t_with_privates* [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK-64-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP11]])
// CHECK-64-NEXT: [[TMP41:%.*]] = load i32, i32* @global, align 4
// CHECK-64-NEXT: [[TMP42:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP41]], [[TMP42]]
// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
// CHECK-64-NEXT: [[TMP43:%.*]] = load i64*, i64** @_ZZ3fooiE6plocal, align 8
// CHECK-64-NEXT: [[TMP44:%.*]] = load i32, i32* @global, align 4
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32*
// CHECK-64-NEXT: store i32 [[TMP44]], i32* [[CONV]], align 4
// CHECK-64-NEXT: [[TMP45:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8
// CHECK-64-NEXT: [[TMP46:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP46]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-64: omp_if.then:
// CHECK-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i64**
// CHECK-64-NEXT: store i64* [[TMP43]], i64** [[TMP48]], align 8
// CHECK-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i64**
// CHECK-64-NEXT: store i64* [[TMP43]], i64** [[TMP50]], align 8
// CHECK-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-64-NEXT: store i8* null, i8** [[TMP51]], align 8
// CHECK-64-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64*
// CHECK-64-NEXT: store i64 [[TMP45]], i64* [[TMP53]], align 8
// CHECK-64-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i64*
// CHECK-64-NEXT: store i64 [[TMP45]], i64* [[TMP55]], align 8
// CHECK-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK-64-NEXT: store i8* null, i8** [[TMP56]], align 8
// CHECK-64-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP60:%.*]] = load i64*, i64** @_ZZ3fooiE6plocal, align 8
// CHECK-64-NEXT: store i64* [[TMP60]], i64** [[TMP59]], align 8
// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP62:%.*]] = load i32, i32* @global, align 4
// CHECK-64-NEXT: store i32 [[TMP62]], i32* [[TMP61]], align 8
// CHECK-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
// CHECK-64-NEXT: store i32 [[TMP64]], i32* [[TMP63]], align 4
// CHECK-64-NEXT: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i64 104, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..6 to i32 (i32, i8*)*))
// CHECK-64-NEXT: [[TMP66:%.*]] = bitcast i8* [[TMP65]] to %struct.kmp_task_t_with_privates.1*
// CHECK-64-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP66]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP69:%.*]] = load i8*, i8** [[TMP68]], align 8
// CHECK-64-NEXT: [[TMP70:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED4]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP69]], i8* align 8 [[TMP70]], i64 16, i1 false)
// CHECK-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1]], %struct.kmp_task_t_with_privates.1* [[TMP66]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP72:%.*]] = bitcast i8* [[TMP69]] to %struct.anon.0*
// CHECK-64-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP74:%.*]] = load i64*, i64** @_ZZ3fooiE6plocal, align 8
// CHECK-64-NEXT: store i64* [[TMP74]], i64** [[TMP73]], align 8
// CHECK-64-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP76:%.*]] = bitcast [2 x i8*]* [[TMP75]] to i8*
// CHECK-64-NEXT: [[TMP77:%.*]] = bitcast i8** [[TMP57]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP76]], i8* align 8 [[TMP77]], i64 16, i1 false)
// CHECK-64-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP79:%.*]] = bitcast [2 x i8*]* [[TMP78]] to i8*
// CHECK-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP58]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP79]], i8* align 8 [[TMP80]], i64 16, i1 false)
// CHECK-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 3
// CHECK-64-NEXT: [[TMP82:%.*]] = bitcast [2 x i64]* [[TMP81]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP82]], i8* align 8 bitcast ([2 x i64]* @.offload_sizes to i8*), i64 16, i1 false)
// CHECK-64-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 4
// CHECK-64-NEXT: [[TMP84:%.*]] = load i32, i32* @global, align 4
// CHECK-64-NEXT: store i32 [[TMP84]], i32* [[TMP83]], align 8
// CHECK-64-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP86:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP85]], i64 0
// CHECK-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i32 0, i32 0
// CHECK-64-NEXT: store i64 ptrtoint (i32* @global to i64), i64* [[TMP87]], align 8
// CHECK-64-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i32 0, i32 1
// CHECK-64-NEXT: store i64 4, i64* [[TMP88]], align 8
// CHECK-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i32 0, i32 2
// CHECK-64-NEXT: store i8 3, i8* [[TMP89]], align 8
// CHECK-64-NEXT: [[TMP90:%.*]] = ptrtoint i32* [[A]] to i64
// CHECK-64-NEXT: [[TMP91:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP85]], i64 1
// CHECK-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP91]], i32 0, i32 0
// CHECK-64-NEXT: store i64 [[TMP90]], i64* [[TMP92]], align 8
// CHECK-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP91]], i32 0, i32 1
// CHECK-64-NEXT: store i64 4, i64* [[TMP93]], align 8
// CHECK-64-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP91]], i32 0, i32 2
// CHECK-64-NEXT: store i8 3, i8* [[TMP94]], align 8
// CHECK-64-NEXT: [[TMP95:%.*]] = mul nuw i64 [[TMP2]], 4
// CHECK-64-NEXT: [[TMP96:%.*]] = ptrtoint float* [[VLA]] to i64
// CHECK-64-NEXT: [[TMP97:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP85]], i64 2
// CHECK-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP97]], i32 0, i32 0
// CHECK-64-NEXT: store i64 [[TMP96]], i64* [[TMP98]], align 8
// CHECK-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP97]], i32 0, i32 1
// CHECK-64-NEXT: store i64 [[TMP95]], i64* [[TMP99]], align 8
// CHECK-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP97]], i32 0, i32 2
// CHECK-64-NEXT: store i8 3, i8* [[TMP100]], align 8
// CHECK-64-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR6]], align 8
// CHECK-64-NEXT: [[TMP101:%.*]] = bitcast %struct.kmp_depend_info* [[TMP85]] to i8*
// CHECK-64-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 3, i8* [[TMP101]], i32 0, i8* null, i32 0)
// CHECK-64-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP65]])
// CHECK-64-NEXT: [[TMP102:%.*]] = call i32 @.omp_task_entry..6(i32 [[TMP0]], %struct.kmp_task_t_with_privates.1* [[TMP66]]) #[[ATTR3]]
// CHECK-64-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP65]])
// CHECK-64-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-64: omp_if.else:
// CHECK-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP104:%.*]] = load i64*, i64** @_ZZ3fooiE6plocal, align 8
// CHECK-64-NEXT: store i64* [[TMP104]], i64** [[TMP103]], align 8
// CHECK-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP106:%.*]] = load i32, i32* @global, align 4
// CHECK-64-NEXT: store i32 [[TMP106]], i32* [[TMP105]], align 8
// CHECK-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP108:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
// CHECK-64-NEXT: store i32 [[TMP108]], i32* [[TMP107]], align 4
// CHECK-64-NEXT: [[TMP109:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i64 56, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..9 to i32 (i32, i8*)*))
// CHECK-64-NEXT: [[TMP110:%.*]] = bitcast i8* [[TMP109]] to %struct.kmp_task_t_with_privates.2*
// CHECK-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP110]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP111]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP113:%.*]] = load i8*, i8** [[TMP112]], align 8
// CHECK-64-NEXT: [[TMP114:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED7]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP113]], i8* align 8 [[TMP114]], i64 16, i1 false)
// CHECK-64-NEXT: [[TMP115:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP110]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP116:%.*]] = bitcast i8* [[TMP113]] to %struct.anon.0*
// CHECK-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP115]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP118:%.*]] = load i64*, i64** @_ZZ3fooiE6plocal, align 8
// CHECK-64-NEXT: store i64* [[TMP118]], i64** [[TMP117]], align 8
// CHECK-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP115]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP120:%.*]] = load i32, i32* @global, align 4
// CHECK-64-NEXT: store i32 [[TMP120]], i32* [[TMP119]], align 8
// CHECK-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR8]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP122:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP121]], i64 0
// CHECK-64-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP122]], i32 0, i32 0
// CHECK-64-NEXT: store i64 ptrtoint (i32* @global to i64), i64* [[TMP123]], align 8
// CHECK-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP122]], i32 0, i32 1
// CHECK-64-NEXT: store i64 4, i64* [[TMP124]], align 8
// CHECK-64-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP122]], i32 0, i32 2
// CHECK-64-NEXT: store i8 3, i8* [[TMP125]], align 8
// CHECK-64-NEXT: [[TMP126:%.*]] = ptrtoint i32* [[A]] to i64
// CHECK-64-NEXT: [[TMP127:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP121]], i64 1
// CHECK-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP127]], i32 0, i32 0
// CHECK-64-NEXT: store i64 [[TMP126]], i64* [[TMP128]], align 8
// CHECK-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP127]], i32 0, i32 1
// CHECK-64-NEXT: store i64 4, i64* [[TMP129]], align 8
// CHECK-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP127]], i32 0, i32 2
// CHECK-64-NEXT: store i8 3, i8* [[TMP130]], align 8
// CHECK-64-NEXT: [[TMP131:%.*]] = mul nuw i64 [[TMP2]], 4
// CHECK-64-NEXT: [[TMP132:%.*]] = ptrtoint float* [[VLA]] to i64
// CHECK-64-NEXT: [[TMP133:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP121]], i64 2
// CHECK-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 0
// CHECK-64-NEXT: store i64 [[TMP132]], i64* [[TMP134]], align 8
// CHECK-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 1
// CHECK-64-NEXT: store i64 [[TMP131]], i64* [[TMP135]], align 8
// CHECK-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 2
// CHECK-64-NEXT: store i8 3, i8* [[TMP136]], align 8
// CHECK-64-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR9]], align 8
// CHECK-64-NEXT: [[TMP137:%.*]] = bitcast %struct.kmp_depend_info* [[TMP121]] to i8*
// CHECK-64-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 3, i8* [[TMP137]], i32 0, i8* null, i32 0)
// CHECK-64-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP109]])
// CHECK-64-NEXT: [[TMP138:%.*]] = call i32 @.omp_task_entry..9(i32 [[TMP0]], %struct.kmp_task_t_with_privates.2* [[TMP110]]) #[[ATTR3]]
// CHECK-64-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP109]])
// CHECK-64-NEXT: br label [[OMP_IF_END]]
// CHECK-64: omp_if.end:
// CHECK-64-NEXT: [[TMP139:%.*]] = load i32, i32* @global, align 4
// CHECK-64-NEXT: [[CONV11:%.*]] = bitcast i64* [[GLOBAL_CASTED10]] to i32*
// CHECK-64-NEXT: store i32 [[TMP139]], i32* [[CONV11]], align 4
// CHECK-64-NEXT: [[TMP140:%.*]] = load i64, i64* [[GLOBAL_CASTED10]], align 8
// CHECK-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED12]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP142:%.*]] = load i32, i32* @global, align 4
// CHECK-64-NEXT: store i32 [[TMP142]], i32* [[TMP141]], align 4
// CHECK-64-NEXT: [[TMP143:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i64 48, i64 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.5*)* @.omp_task_entry..14 to i32 (i32, i8*)*))
// CHECK-64-NEXT: [[TMP144:%.*]] = bitcast i8* [[TMP143]] to %struct.kmp_task_t_with_privates.5*
// CHECK-64-NEXT: [[TMP145:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP144]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP145]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP147:%.*]] = load i8*, i8** [[TMP146]], align 8
// CHECK-64-NEXT: [[TMP148:%.*]] = bitcast %struct.anon.4* [[AGG_CAPTURED12]] to i8*
// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP147]], i8* align 4 [[TMP148]], i64 4, i1 false)
// CHECK-64-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5]], %struct.kmp_task_t_with_privates.5* [[TMP144]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP150:%.*]] = bitcast i8* [[TMP147]] to %struct.anon.4*
// CHECK-64-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_6:%.*]], %struct..kmp_privates.t.6* [[TMP149]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP152:%.*]] = load i32, i32* @global, align 4
// CHECK-64-NEXT: store i32 [[TMP152]], i32* [[TMP151]], align 8
// CHECK-64-NEXT: [[TMP153:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR13]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP154:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP153]], i64 0
// CHECK-64-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP154]], i32 0, i32 0
// CHECK-64-NEXT: store i64 ptrtoint (i32* @global to i64), i64* [[TMP155]], align 8
// CHECK-64-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP154]], i32 0, i32 1
// CHECK-64-NEXT: store i64 4, i64* [[TMP156]], align 8
// CHECK-64-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP154]], i32 0, i32 2
// CHECK-64-NEXT: store i8 3, i8* [[TMP157]], align 8
// CHECK-64-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR14]], align 8
// CHECK-64-NEXT: [[TMP158:%.*]] = bitcast %struct.kmp_depend_info* [[TMP153]] to i8*
// CHECK-64-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i8* [[TMP158]], i32 0, i8* null, i32 0)
// CHECK-64-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP143]])
// CHECK-64-NEXT: [[TMP159:%.*]] = call i32 @.omp_task_entry..14(i32 [[TMP0]], %struct.kmp_task_t_with_privates.5* [[TMP144]]) #[[ATTR3]]
// CHECK-64-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP143]])
// CHECK-64-NEXT: [[TMP160:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT: [[TMP161:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP161]])
// CHECK-64-NEXT: ret i32 [[TMP160]]
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66
// CHECK-64-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]])
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]])
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK-64-SAME: (i32 noundef signext [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK-64-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK-64-NEXT: [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK-64-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK-64-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]])
// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !16
// CHECK-64-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !16
// CHECK-64-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !16
// CHECK-64-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !16
// CHECK-64-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !16
// CHECK-64-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !16
// CHECK-64-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !16
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 0
// CHECK-64-NEXT: store i32 2, i32* [[TMP14]], align 4, !noalias !16
// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 1
// CHECK-64-NEXT: store i32 0, i32* [[TMP15]], align 4, !noalias !16
// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 2
// CHECK-64-NEXT: store i8** null, i8*** [[TMP16]], align 8, !noalias !16
// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 3
// CHECK-64-NEXT: store i8** null, i8*** [[TMP17]], align 8, !noalias !16
// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 4
// CHECK-64-NEXT: store i64* null, i64** [[TMP18]], align 8, !noalias !16
// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 5
// CHECK-64-NEXT: store i64* null, i64** [[TMP19]], align 8, !noalias !16
// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 6
// CHECK-64-NEXT: store i8** null, i8*** [[TMP20]], align 8, !noalias !16
// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 7
// CHECK-64-NEXT: store i8** null, i8*** [[TMP21]], align 8, !noalias !16
// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 8
// CHECK-64-NEXT: store i64 10, i64* [[TMP22]], align 8, !noalias !16
// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 9
// CHECK-64-NEXT: store i64 0, i64* [[TMP23]], align 8, !noalias !16
// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 10
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP24]], align 4, !noalias !16
// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 11
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4, !noalias !16
// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 12
// CHECK-64-NEXT: store i32 0, i32* [[TMP26]], align 4, !noalias !16
// CHECK-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 [[TMP13]], i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]])
// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__2_EXIT:%.*]]
// CHECK-64: omp_offload.failed.i:
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66() #[[ATTR3]]
// CHECK-64-NEXT: br label [[DOTOMP_OUTLINED__2_EXIT]]
// CHECK-64: .omp_outlined..2.exit:
// CHECK-64-NEXT: ret i32 0
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76
// CHECK-64-SAME: (i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8
// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32*
// CHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32*
// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4
// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8
// CHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64* [[TMP0]], i64 [[TMP2]])
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..3
// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8
// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I4:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32*
// CHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[DOTCAPTURE_EXPR_]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8
// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP2]], 0
// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i64 [[SUB]], 1
// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[DIV]] to i32
// CHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[CONV2]], 1
// CHECK-64-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[I]], align 4
// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP3]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-64: omp.precond.then:
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
// CHECK-64-NEXT: [[TMP18:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8
// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: [[CONV7:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32*
// CHECK-64-NEXT: store i32 [[TMP19]], i32* [[CONV7]], align 4
// CHECK-64-NEXT: [[TMP20:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8
// CHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64* [[TMP18]], i64 [[TMP20]])
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP24]])
// CHECK-64-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-64: omp.precond.end:
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..4
// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8
// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I6:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32*
// CHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8
// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[DOTCAPTURE_EXPR_]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8
// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP2]], 0
// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i64 [[SUB]], 1
// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[DIV]] to i32
// CHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[CONV2]], 1
// CHECK-64-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[I]], align 4
// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP3]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-64: omp.precond.then:
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK-64-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32
// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK-64-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32
// CHECK-64-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
// CHECK-64-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK-64-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I6]], align 4
// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: [[CONV9:%.*]] = sext i32 [[TMP17]] to i64
// CHECK-64-NEXT: [[TMP18:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[CONV9]], i64* [[TMP18]], align 8
// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: store i32 [[TMP19]], i32* @_ZZ3fooiE6local1, align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK-64-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]])
// CHECK-64-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-64: omp.precond.end:
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_privates_map.
// CHECK-64-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i64*** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]], [2 x i8*]** noalias noundef [[TMP3:%.*]], [2 x i8*]** noalias noundef [[TMP4:%.*]], [2 x i64]** noalias noundef [[TMP5:%.*]]) #[[ATTR7:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8
// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i64***, align 8
// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8
// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca [2 x i8*]**, align 8
// CHECK-64-NEXT: [[DOTADDR4:%.*]] = alloca [2 x i8*]**, align 8
// CHECK-64-NEXT: [[DOTADDR5:%.*]] = alloca [2 x i64]**, align 8
// CHECK-64-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8
// CHECK-64-NEXT: store i64*** [[TMP1]], i64**** [[DOTADDR1]], align 8
// CHECK-64-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8
// CHECK-64-NEXT: store [2 x i8*]** [[TMP3]], [2 x i8*]*** [[DOTADDR3]], align 8
// CHECK-64-NEXT: store [2 x i8*]** [[TMP4]], [2 x i8*]*** [[DOTADDR4]], align 8
// CHECK-64-NEXT: store [2 x i64]** [[TMP5]], [2 x i64]*** [[DOTADDR5]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8
// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP8:%.*]] = load i64***, i64**** [[DOTADDR1]], align 8
// CHECK-64-NEXT: store i64** [[TMP7]], i64*** [[TMP8]], align 8
// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP10:%.*]] = load [2 x i8*]**, [2 x i8*]*** [[DOTADDR3]], align 8
// CHECK-64-NEXT: store [2 x i8*]* [[TMP9]], [2 x i8*]** [[TMP10]], align 8
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP12:%.*]] = load [2 x i8*]**, [2 x i8*]*** [[DOTADDR4]], align 8
// CHECK-64-NEXT: store [2 x i8*]* [[TMP11]], [2 x i8*]** [[TMP12]], align 8
// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 3
// CHECK-64-NEXT: [[TMP14:%.*]] = load [2 x i64]**, [2 x i64]*** [[DOTADDR5]], align 8
// CHECK-64-NEXT: store [2 x i64]* [[TMP13]], [2 x i64]** [[TMP14]], align 8
// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 4
// CHECK-64-NEXT: [[TMP16:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8
// CHECK-64-NEXT: store i32* [[TMP15]], i32** [[TMP16]], align 8
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_entry..6
// CHECK-64-SAME: (i32 noundef signext [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK-64-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8
// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i64**, align 8
// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x i8*]*, align 8
// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca [2 x i8*]*, align 8
// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR4_I:%.*]] = alloca [2 x i64]*, align 8
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_5_I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.1*, align 8
// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK-64-NEXT: store %struct.kmp_task_t_with_privates.1* [[TMP1]], %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK-64-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.1*, %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0*
// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8*
// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8*
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]])
// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26
// CHECK-64-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !26
// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26
// CHECK-64-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i64***, i32**, [2 x i8*]**, [2 x i8*]**, [2 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26
// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !26
// CHECK-64-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i64***, i32**, [2 x i8*]**, [2 x i8*]**, [2 x i64]**)*
// CHECK-64-NEXT: call void [[TMP15]](i8* [[TMP14]], i64*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], [2 x i64]** [[DOTFIRSTPRIV_PTR_ADDR4_I]]) #[[ATTR3]]
// CHECK-64-NEXT: [[TMP16:%.*]] = load i64**, i64*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP18:%.*]] = load [2 x i8*]*, [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP19:%.*]] = load [2 x i8*]*, [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP20:%.*]] = load [2 x i64]*, [2 x i64]** [[DOTFIRSTPRIV_PTR_ADDR4_I]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP18]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP19]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[TMP20]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
// CHECK-64-NEXT: [[TMP26:%.*]] = sext i32 [[TMP25]] to i64
// CHECK-64-NEXT: [[TMP27:%.*]] = load i64*, i64** [[TMP16]], align 8
// CHECK-64-NEXT: [[TMP28:%.*]] = load i64, i64* [[TMP27]], align 8
// CHECK-64-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURE_EXPR__I]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__I]], align 8, !noalias !26
// CHECK-64-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32
// CHECK-64-NEXT: [[SUB6_I:%.*]] = sub nsw i32 [[CONV_I]], 1
// CHECK-64-NEXT: store i32 [[SUB6_I]], i32* [[DOTCAPTURE_EXPR_5_I]], align 4, !noalias !26
// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5_I]], align 4, !noalias !26
// CHECK-64-NEXT: [[ADD_I:%.*]] = add nsw i32 [[TMP30]], 1
// CHECK-64-NEXT: [[TMP31:%.*]] = zext i32 [[ADD_I]] to i64
// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 0
// CHECK-64-NEXT: store i32 2, i32* [[TMP32]], align 4, !noalias !26
// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 1
// CHECK-64-NEXT: store i32 2, i32* [[TMP33]], align 4, !noalias !26
// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 2
// CHECK-64-NEXT: store i8** [[TMP21]], i8*** [[TMP34]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 3
// CHECK-64-NEXT: store i8** [[TMP22]], i8*** [[TMP35]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 4
// CHECK-64-NEXT: store i64* [[TMP23]], i64** [[TMP36]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 5
// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP37]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 6
// CHECK-64-NEXT: store i8** null, i8*** [[TMP38]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 7
// CHECK-64-NEXT: store i8** null, i8*** [[TMP39]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 8
// CHECK-64-NEXT: store i64 [[TMP31]], i64* [[TMP40]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 9
// CHECK-64-NEXT: store i64 0, i64* [[TMP41]], align 8, !noalias !26
// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 10
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP42]], align 4, !noalias !26
// CHECK-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 11
// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP43]], align 4, !noalias !26
// CHECK-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 12
// CHECK-64-NEXT: store i32 0, i32* [[TMP44]], align 4, !noalias !26
// CHECK-64-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 [[TMP26]], i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]])
// CHECK-64-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0
// CHECK-64-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__5_EXIT:%.*]]
// CHECK-64: omp_offload.failed.i:
// CHECK-64-NEXT: [[TMP47:%.*]] = load i64*, i64** [[TMP16]], align 8
// CHECK-64-NEXT: [[TMP48:%.*]] = load i32, i32* @global, align 4, !noalias !26
// CHECK-64-NEXT: [[CONV7_I:%.*]] = bitcast i64* [[GLOBAL_CASTED_I]] to i32*
// CHECK-64-NEXT: store i32 [[TMP48]], i32* [[CONV7_I]], align 4, !noalias !26
// CHECK-64-NEXT: [[TMP49:%.*]] = load i64, i64* [[GLOBAL_CASTED_I]], align 8, !noalias !26
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76(i64* [[TMP47]], i64 [[TMP49]]) #[[ATTR3]]
// CHECK-64-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT]]
// CHECK-64: .omp_outlined..5.exit:
// CHECK-64-NEXT: ret i32 0
// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_privates_map..8
// CHECK-64-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i64*** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR7]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 8
// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i64***, align 8
// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8
// CHECK-64-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 8
// CHECK-64-NEXT: store i64*** [[TMP1]], i64**** [[DOTADDR1]], align 8
// CHECK-64-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP5:%.*]] = load i64***, i64**** [[DOTADDR1]], align 8
// CHECK-64-NEXT: store i64** [[TMP4]], i64*** [[TMP5]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8
// CHECK-64-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_entry..9
// CHECK-64-SAME: (i32 noundef signext [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK-64-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8
// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i64**, align 8
// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8
// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK-64-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK-64-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0*
// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8*
// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8*
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]])
// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !36
// CHECK-64-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !36
// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !36
// CHECK-64-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i64***, i32**)* @.omp_task_privates_map..8 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !36
// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !36
// CHECK-64-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !36
// CHECK-64-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !36
// CHECK-64-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !36
// CHECK-64-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !36
// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i64***, i32**)*
// CHECK-64-NEXT: call void [[TMP15]](i8* [[TMP14]], i64*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]]
// CHECK-64-NEXT: [[TMP16:%.*]] = load i64**, i64*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !36
// CHECK-64-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !36
// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP19:%.*]] = load i64*, i64** [[TMP16]], align 8
// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* @global, align 4, !noalias !36
// CHECK-64-NEXT: [[CONV_I:%.*]] = bitcast i64* [[GLOBAL_CASTED_I]] to i32*
// CHECK-64-NEXT: store i32 [[TMP20]], i32* [[CONV_I]], align 4, !noalias !36
// CHECK-64-NEXT: [[TMP21:%.*]] = load i64, i64* [[GLOBAL_CASTED_I]], align 8, !noalias !36
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76(i64* [[TMP19]], i64 [[TMP21]]) #[[ATTR3]]
// CHECK-64-NEXT: ret i32 0
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l83
// CHECK-64-SAME: (i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32*
// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32*
// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4
// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8
// CHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP1]])
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..10
// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32*
// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK-64-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[I]], align 4
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-64: omp.precond.then:
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK-64-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-64-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64
// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-64-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: [[CONV6:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32*
// CHECK-64-NEXT: store i32 [[TMP17]], i32* [[CONV6]], align 4
// CHECK-64-NEXT: [[TMP18:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8
// CHECK-64-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4
// CHECK-64-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]])
// CHECK-64-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK-64-NEXT: call void @.omp_outlined..11(i32* [[TMP21]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]]) #[[ATTR3]]
// CHECK-64-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]])
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]])
// CHECK-64-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-64: omp.precond.end:
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..11
// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I5:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32*
// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK-64-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[I]], align 4
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-64: omp.precond.then:
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK-64-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32
// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK-64-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32
// CHECK-64-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I5]], align 4
// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1
// CHECK-64-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK-64-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]])
// CHECK-64-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-64: omp.precond.end:
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_privates_map..13
// CHECK-64-SAME: (%struct..kmp_privates.t.6* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.6*, align 8
// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8
// CHECK-64-NEXT: store %struct..kmp_privates.t.6* [[TMP0]], %struct..kmp_privates.t.6** [[DOTADDR]], align 8
// CHECK-64-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.6*, %struct..kmp_privates.t.6** [[DOTADDR]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_6:%.*]], %struct..kmp_privates.t.6* [[TMP2]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8
// CHECK-64-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8
// CHECK-64-NEXT: ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_entry..14
// CHECK-64-SAME: (i32 noundef signext [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK-64-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.4*, align 8
// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.5*, align 8
// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK-64-NEXT: store %struct.kmp_task_t_with_privates.5* [[TMP1]], %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK-64-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.5*, %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK-64-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4*
// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 1
// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.6* [[TMP9]] to i8*
// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8*
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]])
// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]])
// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !46
// CHECK-64-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !46
// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46
// CHECK-64-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.6*, i32**)* @.omp_task_privates_map..13 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46
// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !46
// CHECK-64-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !46
// CHECK-64-NEXT: [[TMP12:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !46
// CHECK-64-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46
// CHECK-64-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46
// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)*
// CHECK-64-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR3]]
// CHECK-64-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !46
// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
// CHECK-64-NEXT: [[CONV_I:%.*]] = bitcast i64* [[GLOBAL_CASTED_I]] to i32*
// CHECK-64-NEXT: store i32 [[TMP17]], i32* [[CONV_I]], align 4, !noalias !46
// CHECK-64-NEXT: [[TMP18:%.*]] = load i64, i64* [[GLOBAL_CASTED_I]], align 8, !noalias !46
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l83(i64 [[TMP18]]) #[[ATTR3]]
// CHECK-64-NEXT: ret i32 0
// CHECK-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-64-SAME: () #[[ATTR7]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-64-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@_Z3fooi
// CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4
// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4
// CHECK-32-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [4 x %struct.kmp_depend_info], align 4
// CHECK-32-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4
// CHECK-32-NEXT: [[DOTDEP_ARR_ADDR5:%.*]] = alloca [3 x %struct.kmp_depend_info], align 4
// CHECK-32-NEXT: [[DEP_COUNTER_ADDR6:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_0]], align 4
// CHECK-32-NEXT: [[DOTDEP_ARR_ADDR8:%.*]] = alloca [3 x %struct.kmp_depend_info], align 4
// CHECK-32-NEXT: [[DEP_COUNTER_ADDR9:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[GLOBAL_CASTED10:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[AGG_CAPTURED11:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4
// CHECK-32-NEXT: [[DOTDEP_ARR_ADDR12:%.*]] = alloca [1 x %struct.kmp_depend_info], align 4
// CHECK-32-NEXT: [[DEP_COUNTER_ADDR13:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4
// CHECK-32-NEXT: store i16 0, i16* [[AA]], align 2
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = mul nuw i32 5, [[TMP3]]
// CHECK-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP4]], align 8
// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[__VLA_EXPR1]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* @global, align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[TMP7]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 20, i32 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates*
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4
// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false)
// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP15]], i32 0
// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP16]], i32 0, i32 0
// CHECK-32-NEXT: store i32 ptrtoint (i32* @global to i32), i32* [[TMP17]], align 4
// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP16]], i32 0, i32 1
// CHECK-32-NEXT: store i32 4, i32* [[TMP18]], align 4
// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP16]], i32 0, i32 2
// CHECK-32-NEXT: store i8 1, i8* [[TMP19]], align 4
// CHECK-32-NEXT: [[TMP20:%.*]] = ptrtoint i32* [[A]] to i32
// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 1
// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP21]], i32 0, i32 0
// CHECK-32-NEXT: store i32 [[TMP20]], i32* [[TMP22]], align 4
// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP21]], i32 0, i32 1
// CHECK-32-NEXT: store i32 4, i32* [[TMP23]], align 4
// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP21]], i32 0, i32 2
// CHECK-32-NEXT: store i8 3, i8* [[TMP24]], align 4
// CHECK-32-NEXT: [[TMP25:%.*]] = ptrtoint [10 x float]* [[B]] to i32
// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 2
// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP26]], i32 0, i32 0
// CHECK-32-NEXT: store i32 [[TMP25]], i32* [[TMP27]], align 4
// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP26]], i32 0, i32 1
// CHECK-32-NEXT: store i32 40, i32* [[TMP28]], align 4
// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP26]], i32 0, i32 2
// CHECK-32-NEXT: store i8 3, i8* [[TMP29]], align 4
// CHECK-32-NEXT: [[TMP30:%.*]] = mul nsw i32 4, [[TMP3]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP30]]
// CHECK-32-NEXT: [[TMP31:%.*]] = mul nuw i32 [[TMP3]], 8
// CHECK-32-NEXT: [[TMP32:%.*]] = ptrtoint double* [[ARRAYIDX]] to i32
// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 3
// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 0
// CHECK-32-NEXT: store i32 [[TMP32]], i32* [[TMP34]], align 4
// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 1
// CHECK-32-NEXT: store i32 [[TMP31]], i32* [[TMP35]], align 4
// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 2
// CHECK-32-NEXT: store i8 3, i8* [[TMP36]], align 4
// CHECK-32-NEXT: store i32 4, i32* [[DEP_COUNTER_ADDR]], align 4
// CHECK-32-NEXT: [[TMP37:%.*]] = bitcast %struct.kmp_depend_info* [[TMP15]] to i8*
// CHECK-32-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 4, i8* [[TMP37]], i32 0, i8* null, i32 0)
// CHECK-32-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP9]])
// CHECK-32-NEXT: [[TMP38:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP0]], %struct.kmp_task_t_with_privates* [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK-32-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP9]])
// CHECK-32-NEXT: [[TMP39:%.*]] = load i32, i32* @global, align 4
// CHECK-32-NEXT: [[TMP40:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP39]], [[TMP40]]
// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
// CHECK-32-NEXT: [[TMP41:%.*]] = load i32*, i32** @_ZZ3fooiE6plocal, align 4
// CHECK-32-NEXT: [[TMP42:%.*]] = load i32, i32* @global, align 4
// CHECK-32-NEXT: store i32 [[TMP42]], i32* [[GLOBAL_CASTED]], align 4
// CHECK-32-NEXT: [[TMP43:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4
// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP44]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-32: omp_if.then:
// CHECK-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32**
// CHECK-32-NEXT: store i32* [[TMP41]], i32** [[TMP46]], align 4
// CHECK-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
// CHECK-32-NEXT: store i32* [[TMP41]], i32** [[TMP48]], align 4
// CHECK-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK-32-NEXT: store i8* null, i8** [[TMP49]], align 4
// CHECK-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32*
// CHECK-32-NEXT: store i32 [[TMP43]], i32* [[TMP51]], align 4
// CHECK-32-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32*
// CHECK-32-NEXT: store i32 [[TMP43]], i32* [[TMP53]], align 4
// CHECK-32-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK-32-NEXT: store i8* null, i8** [[TMP54]], align 4
// CHECK-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP58:%.*]] = load i32*, i32** @_ZZ3fooiE6plocal, align 4
// CHECK-32-NEXT: store i32* [[TMP58]], i32** [[TMP57]], align 4
// CHECK-32-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP60:%.*]] = load i32, i32* @global, align 4
// CHECK-32-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4
// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
// CHECK-32-NEXT: store i32 [[TMP62]], i32* [[TMP61]], align 4
// CHECK-32-NEXT: [[TMP63:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 60, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..6 to i32 (i32, i8*)*))
// CHECK-32-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP63]] to %struct.kmp_task_t_with_privates.1*
// CHECK-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP64]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP65]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP67:%.*]] = load i8*, i8** [[TMP66]], align 4
// CHECK-32-NEXT: [[TMP68:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED4]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP67]], i8* align 4 [[TMP68]], i32 12, i1 false)
// CHECK-32-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1]], %struct.kmp_task_t_with_privates.1* [[TMP64]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP70:%.*]] = bitcast i8* [[TMP67]] to %struct.anon.0*
// CHECK-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP69]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP72:%.*]] = bitcast [2 x i64]* [[TMP71]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP72]], i8* align 4 bitcast ([2 x i64]* @.offload_sizes to i8*), i32 16, i1 false)
// CHECK-32-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP69]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP74:%.*]] = load i32*, i32** @_ZZ3fooiE6plocal, align 4
// CHECK-32-NEXT: store i32* [[TMP74]], i32** [[TMP73]], align 4
// CHECK-32-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP69]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP76:%.*]] = load i32, i32* @global, align 4
// CHECK-32-NEXT: store i32 [[TMP76]], i32* [[TMP75]], align 4
// CHECK-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP69]], i32 0, i32 3
// CHECK-32-NEXT: [[TMP78:%.*]] = bitcast [2 x i8*]* [[TMP77]] to i8*
// CHECK-32-NEXT: [[TMP79:%.*]] = bitcast i8** [[TMP55]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP78]], i8* align 4 [[TMP79]], i32 8, i1 false)
// CHECK-32-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP69]], i32 0, i32 4
// CHECK-32-NEXT: [[TMP81:%.*]] = bitcast [2 x i8*]* [[TMP80]] to i8*
// CHECK-32-NEXT: [[TMP82:%.*]] = bitcast i8** [[TMP56]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP81]], i8* align 4 [[TMP82]], i32 8, i1 false)
// CHECK-32-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP84:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP83]], i32 0
// CHECK-32-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP84]], i32 0, i32 0
// CHECK-32-NEXT: store i32 ptrtoint (i32* @global to i32), i32* [[TMP85]], align 4
// CHECK-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP84]], i32 0, i32 1
// CHECK-32-NEXT: store i32 4, i32* [[TMP86]], align 4
// CHECK-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP84]], i32 0, i32 2
// CHECK-32-NEXT: store i8 3, i8* [[TMP87]], align 4
// CHECK-32-NEXT: [[TMP88:%.*]] = ptrtoint i32* [[A]] to i32
// CHECK-32-NEXT: [[TMP89:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP83]], i32 1
// CHECK-32-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 0
// CHECK-32-NEXT: store i32 [[TMP88]], i32* [[TMP90]], align 4
// CHECK-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 1
// CHECK-32-NEXT: store i32 4, i32* [[TMP91]], align 4
// CHECK-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 2
// CHECK-32-NEXT: store i8 3, i8* [[TMP92]], align 4
// CHECK-32-NEXT: [[TMP93:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK-32-NEXT: [[TMP94:%.*]] = ptrtoint float* [[VLA]] to i32
// CHECK-32-NEXT: [[TMP95:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP83]], i32 2
// CHECK-32-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP95]], i32 0, i32 0
// CHECK-32-NEXT: store i32 [[TMP94]], i32* [[TMP96]], align 4
// CHECK-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP95]], i32 0, i32 1
// CHECK-32-NEXT: store i32 [[TMP93]], i32* [[TMP97]], align 4
// CHECK-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP95]], i32 0, i32 2
// CHECK-32-NEXT: store i8 3, i8* [[TMP98]], align 4
// CHECK-32-NEXT: store i32 3, i32* [[DEP_COUNTER_ADDR6]], align 4
// CHECK-32-NEXT: [[TMP99:%.*]] = bitcast %struct.kmp_depend_info* [[TMP83]] to i8*
// CHECK-32-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 3, i8* [[TMP99]], i32 0, i8* null, i32 0)
// CHECK-32-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP63]])
// CHECK-32-NEXT: [[TMP100:%.*]] = call i32 @.omp_task_entry..6(i32 [[TMP0]], %struct.kmp_task_t_with_privates.1* [[TMP64]]) #[[ATTR3]]
// CHECK-32-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP63]])
// CHECK-32-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-32: omp_if.else:
// CHECK-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP102:%.*]] = load i32*, i32** @_ZZ3fooiE6plocal, align 4
// CHECK-32-NEXT: store i32* [[TMP102]], i32** [[TMP101]], align 4
// CHECK-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP104:%.*]] = load i32, i32* @global, align 4
// CHECK-32-NEXT: store i32 [[TMP104]], i32* [[TMP103]], align 4
// CHECK-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP106:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
// CHECK-32-NEXT: store i32 [[TMP106]], i32* [[TMP105]], align 4
// CHECK-32-NEXT: [[TMP107:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 28, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..9 to i32 (i32, i8*)*))
// CHECK-32-NEXT: [[TMP108:%.*]] = bitcast i8* [[TMP107]] to %struct.kmp_task_t_with_privates.2*
// CHECK-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP108]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP109]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP111:%.*]] = load i8*, i8** [[TMP110]], align 4
// CHECK-32-NEXT: [[TMP112:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED7]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP111]], i8* align 4 [[TMP112]], i32 12, i1 false)
// CHECK-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP108]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP114:%.*]] = bitcast i8* [[TMP111]] to %struct.anon.0*
// CHECK-32-NEXT: [[TMP115:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP113]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP116:%.*]] = load i32*, i32** @_ZZ3fooiE6plocal, align 4
// CHECK-32-NEXT: store i32* [[TMP116]], i32** [[TMP115]], align 4
// CHECK-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP113]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP118:%.*]] = load i32, i32* @global, align 4
// CHECK-32-NEXT: store i32 [[TMP118]], i32* [[TMP117]], align 4
// CHECK-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR8]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP120:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0
// CHECK-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 0
// CHECK-32-NEXT: store i32 ptrtoint (i32* @global to i32), i32* [[TMP121]], align 4
// CHECK-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 1
// CHECK-32-NEXT: store i32 4, i32* [[TMP122]], align 4
// CHECK-32-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 2
// CHECK-32-NEXT: store i8 3, i8* [[TMP123]], align 4
// CHECK-32-NEXT: [[TMP124:%.*]] = ptrtoint i32* [[A]] to i32
// CHECK-32-NEXT: [[TMP125:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 1
// CHECK-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP125]], i32 0, i32 0
// CHECK-32-NEXT: store i32 [[TMP124]], i32* [[TMP126]], align 4
// CHECK-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP125]], i32 0, i32 1
// CHECK-32-NEXT: store i32 4, i32* [[TMP127]], align 4
// CHECK-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP125]], i32 0, i32 2
// CHECK-32-NEXT: store i8 3, i8* [[TMP128]], align 4
// CHECK-32-NEXT: [[TMP129:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK-32-NEXT: [[TMP130:%.*]] = ptrtoint float* [[VLA]] to i32
// CHECK-32-NEXT: [[TMP131:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 2
// CHECK-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP131]], i32 0, i32 0
// CHECK-32-NEXT: store i32 [[TMP130]], i32* [[TMP132]], align 4
// CHECK-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP131]], i32 0, i32 1
// CHECK-32-NEXT: store i32 [[TMP129]], i32* [[TMP133]], align 4
// CHECK-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP131]], i32 0, i32 2
// CHECK-32-NEXT: store i8 3, i8* [[TMP134]], align 4
// CHECK-32-NEXT: store i32 3, i32* [[DEP_COUNTER_ADDR9]], align 4
// CHECK-32-NEXT: [[TMP135:%.*]] = bitcast %struct.kmp_depend_info* [[TMP119]] to i8*
// CHECK-32-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 3, i8* [[TMP135]], i32 0, i8* null, i32 0)
// CHECK-32-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP107]])
// CHECK-32-NEXT: [[TMP136:%.*]] = call i32 @.omp_task_entry..9(i32 [[TMP0]], %struct.kmp_task_t_with_privates.2* [[TMP108]]) #[[ATTR3]]
// CHECK-32-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP107]])
// CHECK-32-NEXT: br label [[OMP_IF_END]]
// CHECK-32: omp_if.end:
// CHECK-32-NEXT: [[TMP137:%.*]] = load i32, i32* @global, align 4
// CHECK-32-NEXT: store i32 [[TMP137]], i32* [[GLOBAL_CASTED10]], align 4
// CHECK-32-NEXT: [[TMP138:%.*]] = load i32, i32* [[GLOBAL_CASTED10]], align 4
// CHECK-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED11]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP140:%.*]] = load i32, i32* @global, align 4
// CHECK-32-NEXT: store i32 [[TMP140]], i32* [[TMP139]], align 4
// CHECK-32-NEXT: [[TMP141:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 24, i32 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.5*)* @.omp_task_entry..14 to i32 (i32, i8*)*))
// CHECK-32-NEXT: [[TMP142:%.*]] = bitcast i8* [[TMP141]] to %struct.kmp_task_t_with_privates.5*
// CHECK-32-NEXT: [[TMP143:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP142]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP144:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP143]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[TMP144]], align 4
// CHECK-32-NEXT: [[TMP146:%.*]] = bitcast %struct.anon.4* [[AGG_CAPTURED11]] to i8*
// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP145]], i8* align 4 [[TMP146]], i32 4, i1 false)
// CHECK-32-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5]], %struct.kmp_task_t_with_privates.5* [[TMP142]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP148:%.*]] = bitcast i8* [[TMP145]] to %struct.anon.4*
// CHECK-32-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_6:%.*]], %struct..kmp_privates.t.6* [[TMP147]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP150:%.*]] = load i32, i32* @global, align 4
// CHECK-32-NEXT: store i32 [[TMP150]], i32* [[TMP149]], align 4
// CHECK-32-NEXT: [[TMP151:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR12]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP152:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP151]], i32 0
// CHECK-32-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP152]], i32 0, i32 0
// CHECK-32-NEXT: store i32 ptrtoint (i32* @global to i32), i32* [[TMP153]], align 4
// CHECK-32-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP152]], i32 0, i32 1
// CHECK-32-NEXT: store i32 4, i32* [[TMP154]], align 4
// CHECK-32-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP152]], i32 0, i32 2
// CHECK-32-NEXT: store i8 3, i8* [[TMP155]], align 4
// CHECK-32-NEXT: store i32 1, i32* [[DEP_COUNTER_ADDR13]], align 4
// CHECK-32-NEXT: [[TMP156:%.*]] = bitcast %struct.kmp_depend_info* [[TMP151]] to i8*
// CHECK-32-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i8* [[TMP156]], i32 0, i8* null, i32 0)
// CHECK-32-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP141]])
// CHECK-32-NEXT: [[TMP157:%.*]] = call i32 @.omp_task_entry..14(i32 [[TMP0]], %struct.kmp_task_t_with_privates.5* [[TMP142]]) #[[ATTR3]]
// CHECK-32-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP141]])
// CHECK-32-NEXT: [[TMP158:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT: [[TMP159:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP159]])
// CHECK-32-NEXT: ret i32 [[TMP158]]
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66
// CHECK-32-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]])
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]])
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]])
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK-32-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4
// CHECK-32-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4
// CHECK-32-NEXT: [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK-32-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !17
// CHECK-32-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !17
// CHECK-32-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !17
// CHECK-32-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !17
// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !17
// CHECK-32-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 0
// CHECK-32-NEXT: store i32 2, i32* [[TMP14]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 1
// CHECK-32-NEXT: store i32 0, i32* [[TMP15]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 2
// CHECK-32-NEXT: store i8** null, i8*** [[TMP16]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 3
// CHECK-32-NEXT: store i8** null, i8*** [[TMP17]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 4
// CHECK-32-NEXT: store i64* null, i64** [[TMP18]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 5
// CHECK-32-NEXT: store i64* null, i64** [[TMP19]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 6
// CHECK-32-NEXT: store i8** null, i8*** [[TMP20]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 7
// CHECK-32-NEXT: store i8** null, i8*** [[TMP21]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 8
// CHECK-32-NEXT: store i64 10, i64* [[TMP22]], align 8, !noalias !17
// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 9
// CHECK-32-NEXT: store i64 0, i64* [[TMP23]], align 8, !noalias !17
// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 10
// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP24]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 11
// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 12
// CHECK-32-NEXT: store i32 0, i32* [[TMP26]], align 4, !noalias !17
// CHECK-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 [[TMP13]], i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]])
// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__2_EXIT:%.*]]
// CHECK-32: omp_offload.failed.i:
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66() #[[ATTR3]]
// CHECK-32-NEXT: br label [[DOTOMP_OUTLINED__2_EXIT]]
// CHECK-32: .omp_outlined..2.exit:
// CHECK-32-NEXT: ret i32 0
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76
// CHECK-32-SAME: (i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[GLOBAL_CASTED]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4
// CHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP0]], i32 [[TMP2]])
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..3
// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[I]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-32: omp.precond.then:
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP17]], i32* [[GLOBAL_CASTED]], align 4
// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4
// CHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32* [[TMP16]], i32 [[TMP18]])
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]])
// CHECK-32-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-32: omp.precond.end:
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..4
// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[I]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-32: omp.precond.then:
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I3]], align 4
// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP18:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP17]], i32* [[TMP18]], align 4
// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP19]], i32* @_ZZ3fooiE6local1, align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1
// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]])
// CHECK-32-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-32: omp.precond.end:
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_privates_map.
// CHECK-32-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i32*** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]], [2 x i8*]** noalias noundef [[TMP3:%.*]], [2 x i8*]** noalias noundef [[TMP4:%.*]], [2 x i64]** noalias noundef [[TMP5:%.*]]) #[[ATTR7:[0-9]+]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 4
// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32***, align 4
// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 4
// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca [2 x i8*]**, align 4
// CHECK-32-NEXT: [[DOTADDR4:%.*]] = alloca [2 x i8*]**, align 4
// CHECK-32-NEXT: [[DOTADDR5:%.*]] = alloca [2 x i64]**, align 4
// CHECK-32-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 4
// CHECK-32-NEXT: store i32*** [[TMP1]], i32**** [[DOTADDR1]], align 4
// CHECK-32-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 4
// CHECK-32-NEXT: store [2 x i8*]** [[TMP3]], [2 x i8*]*** [[DOTADDR3]], align 4
// CHECK-32-NEXT: store [2 x i8*]** [[TMP4]], [2 x i8*]*** [[DOTADDR4]], align 4
// CHECK-32-NEXT: store [2 x i64]** [[TMP5]], [2 x i64]*** [[DOTADDR5]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 4
// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP8:%.*]] = load [2 x i64]**, [2 x i64]*** [[DOTADDR5]], align 4
// CHECK-32-NEXT: store [2 x i64]* [[TMP7]], [2 x i64]** [[TMP8]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP10:%.*]] = load i32***, i32**** [[DOTADDR1]], align 4
// CHECK-32-NEXT: store i32** [[TMP9]], i32*** [[TMP10]], align 4
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[DOTADDR2]], align 4
// CHECK-32-NEXT: store i32* [[TMP11]], i32** [[TMP12]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 3
// CHECK-32-NEXT: [[TMP14:%.*]] = load [2 x i8*]**, [2 x i8*]*** [[DOTADDR3]], align 4
// CHECK-32-NEXT: store [2 x i8*]* [[TMP13]], [2 x i8*]** [[TMP14]], align 4
// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 4
// CHECK-32-NEXT: [[TMP16:%.*]] = load [2 x i8*]**, [2 x i8*]*** [[DOTADDR4]], align 4
// CHECK-32-NEXT: store [2 x i8*]* [[TMP15]], [2 x i8*]** [[TMP16]], align 4
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_entry..6
// CHECK-32-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4
// CHECK-32-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4
// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32**, align 4
// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x i8*]*, align 4
// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca [2 x i8*]*, align 4
// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR4_I:%.*]] = alloca [2 x i64]*, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_5_I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.1*, align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK-32-NEXT: store %struct.kmp_task_t_with_privates.1* [[TMP1]], %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.1*, %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0*
// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8*
// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8*
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !27
// CHECK-32-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !27
// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27
// CHECK-32-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32***, i32**, [2 x i8*]**, [2 x i8*]**, [2 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27
// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !27
// CHECK-32-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32***, i32**, [2 x i8*]**, [2 x i8*]**, [2 x i64]**)*
// CHECK-32-NEXT: call void [[TMP15]](i8* [[TMP14]], i32*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], [2 x i64]** [[DOTFIRSTPRIV_PTR_ADDR4_I]]) #[[ATTR3]]
// CHECK-32-NEXT: [[TMP16:%.*]] = load i32**, i32*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP18:%.*]] = load [2 x i8*]*, [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP19:%.*]] = load [2 x i8*]*, [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP20:%.*]] = load [2 x i64]*, [2 x i64]** [[DOTFIRSTPRIV_PTR_ADDR4_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP18]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP19]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[TMP20]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
// CHECK-32-NEXT: [[TMP26:%.*]] = sext i32 [[TMP25]] to i64
// CHECK-32-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP16]], align 4
// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4
// CHECK-32-NEXT: store i32 [[TMP28]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !27
// CHECK-32-NEXT: [[SUB6_I:%.*]] = sub nsw i32 [[TMP29]], 1
// CHECK-32-NEXT: store i32 [[SUB6_I]], i32* [[DOTCAPTURE_EXPR_5_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[ADD_I:%.*]] = add nsw i32 [[TMP30]], 1
// CHECK-32-NEXT: [[TMP31:%.*]] = zext i32 [[ADD_I]] to i64
// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 0
// CHECK-32-NEXT: store i32 2, i32* [[TMP32]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 1
// CHECK-32-NEXT: store i32 2, i32* [[TMP33]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 2
// CHECK-32-NEXT: store i8** [[TMP21]], i8*** [[TMP34]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 3
// CHECK-32-NEXT: store i8** [[TMP22]], i8*** [[TMP35]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 4
// CHECK-32-NEXT: store i64* [[TMP23]], i64** [[TMP36]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 5
// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP37]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 6
// CHECK-32-NEXT: store i8** null, i8*** [[TMP38]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 7
// CHECK-32-NEXT: store i8** null, i8*** [[TMP39]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 8
// CHECK-32-NEXT: store i64 [[TMP31]], i64* [[TMP40]], align 8, !noalias !27
// CHECK-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 9
// CHECK-32-NEXT: store i64 0, i64* [[TMP41]], align 8, !noalias !27
// CHECK-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 10
// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP42]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 11
// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP43]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 12
// CHECK-32-NEXT: store i32 0, i32* [[TMP44]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 [[TMP26]], i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]])
// CHECK-32-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0
// CHECK-32-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__5_EXIT:%.*]]
// CHECK-32: omp_offload.failed.i:
// CHECK-32-NEXT: [[TMP47:%.*]] = load i32*, i32** [[TMP16]], align 4
// CHECK-32-NEXT: [[TMP48:%.*]] = load i32, i32* @global, align 4, !noalias !27
// CHECK-32-NEXT: store i32 [[TMP48]], i32* [[GLOBAL_CASTED_I]], align 4, !noalias !27
// CHECK-32-NEXT: [[TMP49:%.*]] = load i32, i32* [[GLOBAL_CASTED_I]], align 4, !noalias !27
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76(i32* [[TMP47]], i32 [[TMP49]]) #[[ATTR3]]
// CHECK-32-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT]]
// CHECK-32: .omp_outlined..5.exit:
// CHECK-32-NEXT: ret i32 0
// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_privates_map..8
// CHECK-32-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i32*** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR7]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 4
// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32***, align 4
// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 4
// CHECK-32-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 4
// CHECK-32-NEXT: store i32*** [[TMP1]], i32**** [[DOTADDR1]], align 4
// CHECK-32-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32***, i32**** [[DOTADDR1]], align 4
// CHECK-32-NEXT: store i32** [[TMP4]], i32*** [[TMP5]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 4
// CHECK-32-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 4
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_entry..9
// CHECK-32-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4
// CHECK-32-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4
// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32**, align 4
// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK-32-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0*
// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8*
// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8*
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]])
// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !37
// CHECK-32-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !37
// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !37
// CHECK-32-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32***, i32**)* @.omp_task_privates_map..8 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !37
// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !37
// CHECK-32-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !37
// CHECK-32-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !37
// CHECK-32-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !37
// CHECK-32-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !37
// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32***, i32**)*
// CHECK-32-NEXT: call void [[TMP15]](i8* [[TMP14]], i32*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]]
// CHECK-32-NEXT: [[TMP16:%.*]] = load i32**, i32*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !37
// CHECK-32-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !37
// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP19:%.*]] = load i32*, i32** [[TMP16]], align 4
// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* @global, align 4, !noalias !37
// CHECK-32-NEXT: store i32 [[TMP20]], i32* [[GLOBAL_CASTED_I]], align 4, !noalias !37
// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[GLOBAL_CASTED_I]], align 4, !noalias !37
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76(i32* [[TMP19]], i32 [[TMP21]]) #[[ATTR3]]
// CHECK-32-NEXT: ret i32 0
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l83
// CHECK-32-SAME: (i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[GLOBAL_CASTED]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4
// CHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP1]])
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..10
// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[I]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-32: omp.precond.then:
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP15]], i32* [[GLOBAL_CASTED]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4
// CHECK-32-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
// CHECK-32-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP18]])
// CHECK-32-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
// CHECK-32-NEXT: call void @.omp_outlined..11(i32* [[TMP19]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP13]], i32 [[TMP14]], i32 [[TMP16]]) #[[ATTR3]]
// CHECK-32-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP18]])
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]])
// CHECK-32-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-32: omp.precond.end:
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..11
// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[I]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-32: omp.precond.then:
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I3]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1
// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[GLOBAL_ADDR]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK-32-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]])
// CHECK-32-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-32: omp.precond.end:
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_privates_map..13
// CHECK-32-SAME: (%struct..kmp_privates.t.6* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.6*, align 4
// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 4
// CHECK-32-NEXT: store %struct..kmp_privates.t.6* [[TMP0]], %struct..kmp_privates.t.6** [[DOTADDR]], align 4
// CHECK-32-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.6*, %struct..kmp_privates.t.6** [[DOTADDR]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_6:%.*]], %struct..kmp_privates.t.6* [[TMP2]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 4
// CHECK-32-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 4
// CHECK-32-NEXT: ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_entry..14
// CHECK-32-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4
// CHECK-32-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.4*, align 4
// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.5*, align 4
// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK-32-NEXT: store %struct.kmp_task_t_with_privates.5* [[TMP1]], %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.5*, %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4*
// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.6* [[TMP9]] to i8*
// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8*
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]])
// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]])
// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !47
// CHECK-32-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !47
// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !47
// CHECK-32-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.6*, i32**)* @.omp_task_privates_map..13 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !47
// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !47
// CHECK-32-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 4, !noalias !47
// CHECK-32-NEXT: [[TMP12:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 4, !noalias !47
// CHECK-32-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !47
// CHECK-32-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !47
// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)*
// CHECK-32-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR3]]
// CHECK-32-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !47
// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
// CHECK-32-NEXT: store i32 [[TMP17]], i32* [[GLOBAL_CASTED_I]], align 4, !noalias !47
// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[GLOBAL_CASTED_I]], align 4, !noalias !47
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l83(i32 [[TMP18]]) #[[ATTR3]]
// CHECK-32-NEXT: ret i32 0
// CHECK-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-32-SAME: () #[[ATTR7]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK-32-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66
// TCHECK-64-SAME: () #[[ATTR0:[0-9]+]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
// TCHECK-64-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@.omp_outlined.
// TCHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// TCHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// TCHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// TCHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// TCHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// TCHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-64: cond.true:
// TCHECK-64-NEXT: br label [[COND_END:%.*]]
// TCHECK-64: cond.false:
// TCHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: br label [[COND_END]]
// TCHECK-64: cond.end:
// TCHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// TCHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-64: omp.inner.for.cond:
// TCHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// TCHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-64: omp.inner.for.body:
// TCHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
// TCHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// TCHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-64: omp.inner.for.inc:
// TCHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-64: omp.inner.for.end:
// TCHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-64: omp.loop.exit:
// TCHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]])
// TCHECK-64-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..1
// TCHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// TCHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// TCHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// TCHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// TCHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// TCHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
// TCHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
// TCHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// TCHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// TCHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// TCHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-64: cond.true:
// TCHECK-64-NEXT: br label [[COND_END:%.*]]
// TCHECK-64: cond.false:
// TCHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: br label [[COND_END]]
// TCHECK-64: cond.end:
// TCHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
// TCHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// TCHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-64: omp.inner.for.cond:
// TCHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// TCHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-64: omp.inner.for.body:
// TCHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// TCHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// TCHECK-64: omp.body.continue:
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-64: omp.inner.for.inc:
// TCHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// TCHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-64: omp.inner.for.end:
// TCHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-64: omp.loop.exit:
// TCHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]])
// TCHECK-64-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76
// TCHECK-64-SAME: (i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8
// TCHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8
// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32*
// TCHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32*
// TCHECK-64-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4
// TCHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8
// TCHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64* [[TMP0]], i64 [[TMP2]])
// TCHECK-64-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..2
// TCHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// TCHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// TCHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8
// TCHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[I4:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// TCHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8
// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32*
// TCHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8
// TCHECK-64-NEXT: store i64 [[TMP1]], i64* [[DOTCAPTURE_EXPR_]], align 8
// TCHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8
// TCHECK-64-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP2]], 0
// TCHECK-64-NEXT: [[DIV:%.*]] = sdiv i64 [[SUB]], 1
// TCHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[DIV]] to i32
// TCHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[CONV2]], 1
// TCHECK-64-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-64-NEXT: store i32 0, i32* [[I]], align 4
// TCHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8
// TCHECK-64-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP3]]
// TCHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// TCHECK-64: omp.precond.then:
// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// TCHECK-64-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
// TCHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// TCHECK-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-64: cond.true:
// TCHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-64-NEXT: br label [[COND_END:%.*]]
// TCHECK-64: cond.false:
// TCHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: br label [[COND_END]]
// TCHECK-64: cond.end:
// TCHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
// TCHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-64-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-64: omp.inner.for.cond:
// TCHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// TCHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-64: omp.inner.for.body:
// TCHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-64-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
// TCHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-64-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
// TCHECK-64-NEXT: [[TMP18:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-64-NEXT: [[CONV7:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32*
// TCHECK-64-NEXT: store i32 [[TMP19]], i32* [[CONV7]], align 4
// TCHECK-64-NEXT: [[TMP20:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8
// TCHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64* [[TMP18]], i64 [[TMP20]])
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-64: omp.inner.for.inc:
// TCHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-64: omp.inner.for.end:
// TCHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-64: omp.loop.exit:
// TCHECK-64-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
// TCHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP24]])
// TCHECK-64-NEXT: br label [[OMP_PRECOND_END]]
// TCHECK-64: omp.precond.end:
// TCHECK-64-NEXT: ret void
// TCHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..3
// TCHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT: entry:
// TCHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// TCHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// TCHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8
// TCHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: [[I6:%.*]] = alloca i32, align 4
// TCHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// TCHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8
// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32*
// TCHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8
// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8
// TCHECK-64-NEXT: store i64 [[TMP1]], i64* [[DOTCAPTURE_EXPR_]], align 8
// TCHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8
// TCHECK-64-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP2]], 0
// TCHECK-64-NEXT: [[DIV:%.*]] = sdiv i64 [[SUB]], 1
// TCHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[DIV]] to i32
// TCHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[CONV2]], 1
// TCHECK-64-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-64-NEXT: store i32 0, i32* [[I]], align 4
// TCHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8
// TCHECK-64-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP3]]
// TCHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// TCHECK-64: omp.precond.then:
// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// TCHECK-64-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32
// TCHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// TCHECK-64-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32
// TCHECK-64-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4
// TCHECK-64-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// TCHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// TCHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-64-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
// TCHECK-64-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-64: cond.true:
// TCHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-64-NEXT: br label [[COND_END:%.*]]
// TCHECK-64: cond.false:
// TCHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: br label [[COND_END]]
// TCHECK-64: cond.end:
// TCHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
// TCHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// TCHECK-64-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-64: omp.inner.for.cond:
// TCHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-64-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// TCHECK-64-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-64: omp.inner.for.body:
// TCHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[I6]], align 4
// TCHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-64-NEXT: [[CONV9:%.*]] = sext i32 [[TMP17]] to i64
// TCHECK-64-NEXT: [[TMP18:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8
// TCHECK-64-NEXT: store i64 [[CONV9]], i64* [[TMP18]], align 8
// TCHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-64-NEXT: store i32 [[TMP19]], i32* @_ZZ3fooiE6local1, align 4
// TCHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// TCHECK-64: omp.body.continue:
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-64: omp.inner.for.inc:
// TCHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1
// TCHECK-64-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4
// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-64: omp.inner.for.end:
// TCHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-64: omp.loop.exit:
// TCHECK-64-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// TCHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4
// TCHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]])
// TCHECK-64-NEXT: br label [[OMP_PRECOND_END]]
// TCHECK-64: omp.precond.end:
// TCHECK-64-NEXT: ret void
// TCHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66
// TCHECK-32-SAME: () #[[ATTR0:[0-9]+]] {
// TCHECK-32-NEXT: entry:
// TCHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
// TCHECK-32-NEXT: ret void
// TCHECK-32-LABEL: define {{[^@]+}}@.omp_outlined.
// TCHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
// TCHECK-32-NEXT: entry:
// TCHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// TCHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// TCHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// TCHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-32: cond.true:
// TCHECK-32-NEXT: br label [[COND_END:%.*]]
// TCHECK-32: cond.false:
// TCHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: br label [[COND_END]]
// TCHECK-32: cond.end:
// TCHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// TCHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-32: omp.inner.for.cond:
// TCHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// TCHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-32: omp.inner.for.body:
// TCHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]])
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-32: omp.inner.for.inc:
// TCHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
// TCHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-32: omp.inner.for.end:
// TCHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-32: omp.loop.exit:
// TCHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]])
// TCHECK-32-NEXT: ret void
// TCHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..1
// TCHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] {
// TCHECK-32-NEXT: entry:
// TCHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// TCHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
// TCHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4
// TCHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// TCHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// TCHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// TCHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-32: cond.true:
// TCHECK-32-NEXT: br label [[COND_END:%.*]]
// TCHECK-32: cond.false:
// TCHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: br label [[COND_END]]
// TCHECK-32: cond.end:
// TCHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
// TCHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// TCHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-32: omp.inner.for.cond:
// TCHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// TCHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-32: omp.inner.for.body:
// TCHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// TCHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// TCHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4
// TCHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// TCHECK-32: omp.body.continue:
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-32: omp.inner.for.inc:
// TCHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
// TCHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-32: omp.inner.for.end:
// TCHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-32: omp.loop.exit:
// TCHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]])
// TCHECK-32-NEXT: ret void
// TCHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76
// TCHECK-32-SAME: (i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR0]] {
// TCHECK-32-NEXT: entry:
// TCHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4
// TCHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4
// TCHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[TMP1]], i32* [[GLOBAL_CASTED]], align 4
// TCHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4
// TCHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[TMP0]], i32 [[TMP2]])
// TCHECK-32-NEXT: ret void
// TCHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..2
// TCHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR0]] {
// TCHECK-32-NEXT: entry:
// TCHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// TCHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4
// TCHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4
// TCHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// TCHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
// TCHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// TCHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// TCHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// TCHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// TCHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-32-NEXT: store i32 0, i32* [[I]], align 4
// TCHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// TCHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// TCHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// TCHECK-32: omp.precond.then:
// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// TCHECK-32-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
// TCHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// TCHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-32: cond.true:
// TCHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-32-NEXT: br label [[COND_END:%.*]]
// TCHECK-32: cond.false:
// TCHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: br label [[COND_END]]
// TCHECK-32: cond.end:
// TCHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
// TCHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-32: omp.inner.for.cond:
// TCHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// TCHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-32: omp.inner.for.body:
// TCHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// TCHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// TCHECK-32-NEXT: [[TMP16:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4
// TCHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[TMP17]], i32* [[GLOBAL_CASTED]], align 4
// TCHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4
// TCHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32* [[TMP16]], i32 [[TMP18]])
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-32: omp.inner.for.inc:
// TCHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
// TCHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-32: omp.inner.for.end:
// TCHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-32: omp.loop.exit:
// TCHECK-32-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4
// TCHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]])
// TCHECK-32-NEXT: br label [[OMP_PRECOND_END]]
// TCHECK-32: omp.precond.end:
// TCHECK-32-NEXT: ret void
// TCHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..3
// TCHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR0]] {
// TCHECK-32-NEXT: entry:
// TCHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4
// TCHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
// TCHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
// TCHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4
// TCHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4
// TCHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// TCHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
// TCHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// TCHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// TCHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// TCHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// TCHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-32-NEXT: store i32 0, i32* [[I]], align 4
// TCHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// TCHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// TCHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// TCHECK-32: omp.precond.then:
// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// TCHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
// TCHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4
// TCHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// TCHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// TCHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// TCHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
// TCHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// TCHECK-32: cond.true:
// TCHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// TCHECK-32-NEXT: br label [[COND_END:%.*]]
// TCHECK-32: cond.false:
// TCHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: br label [[COND_END]]
// TCHECK-32: cond.end:
// TCHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
// TCHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// TCHECK-32-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// TCHECK-32: omp.inner.for.cond:
// TCHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// TCHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// TCHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// TCHECK-32: omp.inner.for.body:
// TCHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// TCHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// TCHECK-32-NEXT: store i32 [[ADD]], i32* [[I3]], align 4
// TCHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// TCHECK-32-NEXT: [[TMP18:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[TMP17]], i32* [[TMP18]], align 4
// TCHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4
// TCHECK-32-NEXT: store i32 [[TMP19]], i32* @_ZZ3fooiE6local1, align 4
// TCHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// TCHECK-32: omp.body.continue:
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// TCHECK-32: omp.inner.for.inc:
// TCHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1
// TCHECK-32-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4
// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// TCHECK-32: omp.inner.for.end:
// TCHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// TCHECK-32: omp.loop.exit:
// TCHECK-32-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// TCHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4
// TCHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]])
// TCHECK-32-NEXT: br label [[OMP_PRECOND_END]]
// TCHECK-32: omp.precond.end:
// TCHECK-32-NEXT: ret void
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66
// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.omp_outlined)
// CHECK-NEXT: ret void
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
// SIMD-ONLY0: {{.*}}
// SIMD-ONLY1: {{.*}}
// TCHECK: {{.*}}