| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 |
| // REQUIRES: x86-registered-target |
| // REQUIRES: amdgpu-registered-target |
| |
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=CHECK %s |
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=CHECK-SPIRV %s |
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=OPT |
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=OPT-SPIRV |
| // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s |
| |
| #include "Inputs/cuda.h" |
| |
| // Coerced struct from `struct S` without all generic pointers lowered into |
| // global ones. |
| |
| // On the host-side compilation, generic pointer won't be coerced. |
| |
| // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel1Pi( |
| // CHECK-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr |
| // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr |
| // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8 |
| // CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 |
| // CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 |
| // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 |
| // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi( |
| // CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META5:![0-9]+]] { |
| // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] |
| // CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 |
| // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 |
| // CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 |
| // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel1Pi( |
| // OPT-SAME: ptr addrspace(1) noundef captures(none) [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 |
| // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 |
| // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 |
| // OPT-NEXT: ret void |
| // |
| // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi( |
| // OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META5:![0-9]+]] { |
| // OPT-SPIRV-NEXT: [[ENTRY:.*:]] |
| // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 |
| // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) |
| // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 |
| // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 |
| // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 |
| // OPT-SPIRV-NEXT: ret void |
| // |
| // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi( |
| // HOST-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 |
| // HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) |
| // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 |
| // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] |
| // HOST: [[SETUP_NEXT]]: |
| // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel1Pi) |
| // HOST-NEXT: br label %[[SETUP_END]] |
| // HOST: [[SETUP_END]]: |
| // HOST-NEXT: ret void |
| // |
| __global__ void kernel1(int *x) { |
| x[0]++; |
| } |
| |
| // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel2Ri( |
| // CHECK-SAME: ptr addrspace(1) noundef nonnull align 4 dereferenceable(4) [[X_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr |
| // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr |
| // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8 |
| // CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 |
| // CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 |
| // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 |
| // CHECK-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri( |
| // CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] |
| // CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 |
| // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 |
| // CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 |
| // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 |
| // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 |
| // CHECK-SPIRV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel2Ri( |
| // OPT-SAME: ptr addrspace(1) noundef nonnull align 4 captures(none) dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 |
| // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 |
| // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 |
| // OPT-NEXT: ret void |
| // |
| // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri( |
| // OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // OPT-SPIRV-NEXT: [[ENTRY:.*:]] |
| // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 |
| // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) |
| // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 |
| // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 |
| // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 |
| // OPT-SPIRV-NEXT: ret void |
| // |
| // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri( |
| // HOST-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 |
| // HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) |
| // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 |
| // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] |
| // HOST: [[SETUP_NEXT]]: |
| // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel2Ri) |
| // HOST-NEXT: br label %[[SETUP_END]] |
| // HOST: [[SETUP_END]]: |
| // HOST-NEXT: ret void |
| // |
| __global__ void kernel2(int &x) { |
| x++; |
| } |
| |
| // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i( |
| // CHECK-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8, addrspace(5) |
| // CHECK-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) |
| // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr |
| // CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr |
| // CHECK-NEXT: store ptr addrspace(2) [[X]], ptr [[X_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: store ptr addrspace(1) [[Y]], ptr [[Y_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(2), ptr [[X_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[TMP0]], i64 0 |
| // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[Y_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP2]], i64 0 |
| // CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX1]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( |
| // CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] |
| // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8 |
| // CHECK-SPIRV-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8 |
| // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr [[Y_ADDR]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: store ptr addrspace(2) [[X]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[Y]], ptr addrspace(4) [[Y_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(2), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[TMP0]], i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[Y_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP2]], i64 0 |
| // CHECK-SPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX1]], align 4 |
| // CHECK-SPIRV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i( |
| // OPT-SAME: ptr addrspace(2) noundef readonly captures(none) [[X:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 |
| // OPT-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 |
| // OPT-NEXT: ret void |
| // |
| // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( |
| // OPT-SPIRV-SAME: ptr addrspace(2) noundef readonly captures(none) [[X:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] !max_work_group_size [[META5]] { |
| // OPT-SPIRV-NEXT: [[ENTRY:.*:]] |
| // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 |
| // OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 |
| // OPT-SPIRV-NEXT: ret void |
| // |
| // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i( |
| // HOST-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8 |
| // HOST-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8 |
| // HOST-NEXT: store ptr addrspace(2) [[X]], ptr [[X_ADDR]], align 8 |
| // HOST-NEXT: store ptr addrspace(1) [[Y]], ptr [[Y_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) |
| // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 |
| // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] |
| // HOST: [[SETUP_NEXT]]: |
| // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[Y_ADDR]], i64 8, i64 8) |
| // HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 |
| // HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT1:.*]], label %[[SETUP_END]] |
| // HOST: [[SETUP_NEXT1]]: |
| // HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel3PU3AS2iPU3AS1i) |
| // HOST-NEXT: br label %[[SETUP_END]] |
| // HOST: [[SETUP_END]]: |
| // HOST-NEXT: ret void |
| // |
| __global__ void kernel3(__attribute__((address_space(2))) int *x, |
| __attribute__((address_space(1))) int *y) { |
| y[0] = x[0]; |
| } |
| |
| // CHECK-LABEL: define dso_local void @_Z4funcPi( |
| // CHECK-SAME: ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr |
| // CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 |
| // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 |
| // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| // CHECK-SPIRV-LABEL: define spir_func void @_Z4funcPi( |
| // CHECK-SPIRV-SAME: ptr addrspace(4) noundef [[X:%.*]]) addrspace(4) #[[ATTR1:[0-9]+]] { |
| // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] |
| // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 |
| // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 |
| // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local void @_Z4funcPi( |
| // OPT-SAME: ptr noundef captures(none) [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4 |
| // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 |
| // OPT-NEXT: store i32 [[INC]], ptr [[X]], align 4 |
| // OPT-NEXT: ret void |
| // |
| // OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi( |
| // OPT-SPIRV-SAME: ptr addrspace(4) noundef captures(none) [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { |
| // OPT-SPIRV-NEXT: [[ENTRY:.*:]] |
| // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4 |
| // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 |
| // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[X]], align 4 |
| // OPT-SPIRV-NEXT: ret void |
| // |
| __device__ void func(int *x) { |
| x[0]++; |
| } |
| |
| struct S { |
| int *x; |
| float *y; |
| }; |
| // `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect |
| // by-val). However, the enhanced address inferring pass should be able to |
| // assume they are global pointers. |
| // For SPIR-V, since byref is not supported at the moment, we pass it as direct. |
| |
| // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S( |
| // CHECK-SAME: ptr addrspace(4) noundef byref([[STRUCT_S:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_S]], align 8, addrspace(5) |
| // CHECK-NEXT: [[S:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr |
| // CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[S]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) |
| // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0 |
| // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 |
| // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 |
| // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1 |
| // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[Y]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 0 |
| // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 |
| // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00 |
| // CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX1]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( |
| // CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] |
| // CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8 |
| // CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0 |
| // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 |
| // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 |
| // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1 |
| // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 |
| // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP2]], align 8 |
| // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0 |
| // CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP4]], i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 |
| // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1 |
| // CHECK-SPIRV-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP6]], i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP7:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 |
| // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP7]], 1.000000e+00 |
| // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 |
| // CHECK-SPIRV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S( |
| // OPT-SAME: ptr addrspace(4) noundef readonly byref([[STRUCT_S:%.*]]) align 8 captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4:![0-9]+]] |
| // OPT-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1) |
| // OPT-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8 |
| // OPT-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]] |
| // OPT-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1) |
| // OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]] |
| // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 |
| // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[TMP1]], align 4 |
| // OPT-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(1) [[TMP2]], align 4 |
| // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00 |
| // OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[TMP2]], align 4 |
| // OPT-NEXT: ret void |
| // |
| // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( |
| // OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // OPT-SPIRV-NEXT: [[ENTRY:.*:]] |
| // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 |
| // OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 |
| // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 |
| // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 |
| // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 |
| // OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP1]], align 4 |
| // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 |
| // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP1]], align 4 |
| // OPT-SPIRV-NEXT: ret void |
| // |
| // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel41S( |
| // HOST-SAME: ptr [[S_COERCE0:%.*]], ptr [[S_COERCE1:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[S]], i32 0, i32 0 |
| // HOST-NEXT: store ptr [[S_COERCE0]], ptr [[TMP0]], align 8 |
| // HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[S]], i32 0, i32 1 |
| // HOST-NEXT: store ptr [[S_COERCE1]], ptr [[TMP1]], align 8 |
| // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[S]], i64 16, i64 0) |
| // HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 |
| // HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] |
| // HOST: [[SETUP_NEXT]]: |
| // HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel41S) |
| // HOST-NEXT: br label %[[SETUP_END]] |
| // HOST: [[SETUP_END]]: |
| // HOST-NEXT: ret void |
| // |
| __global__ void kernel4(struct S s) { |
| s.x[0]++; |
| s.y[0] += 1.f; |
| } |
| |
| // If a pointer to struct is passed, only the pointer itself is coerced into the global one. |
| |
| // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S( |
| // CHECK-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr |
| // CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr |
| // CHECK-NEXT: store ptr addrspace(1) [[S_COERCE]], ptr [[S_ASCAST]], align 8 |
| // CHECK-NEXT: [[S1:%.*]] = load ptr, ptr [[S_ASCAST]], align 8 |
| // CHECK-NEXT: store ptr [[S1]], ptr [[S_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP0]], i32 0, i32 0 |
| // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 |
| // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 |
| // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[S_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP3]], i32 0, i32 1 |
| // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[Y]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 0 |
| // CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 |
| // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 |
| // CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S( |
| // CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] |
| // CHECK-SPIRV-NEXT: [[S:%.*]] = alloca ptr addrspace(4), align 8 |
| // CHECK-SPIRV-NEXT: [[S_ADDR:%.*]] = alloca ptr addrspace(4), align 8 |
| // CHECK-SPIRV-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr [[S_ADDR]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[S_COERCE]], ptr addrspace(4) [[S_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[S1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[S1]], ptr addrspace(4) [[S_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr addrspace(4) [[TMP0]], i32 0, i32 0 |
| // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 |
| // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[TMP3]], i32 0, i32 1 |
| // CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 |
| // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 |
| // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 |
| // CHECK-SPIRV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S( |
| // OPT-SAME: ptr addrspace(1) noundef readonly captures(none) [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(1) [[S_COERCE]], align 8 |
| // OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 |
| // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 |
| // OPT-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 |
| // OPT-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[S_COERCE]], i64 8 |
| // OPT-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(1) [[Y]], align 8 |
| // OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4 |
| // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 |
| // OPT-NEXT: store float [[ADD]], ptr [[TMP2]], align 4 |
| // OPT-NEXT: ret void |
| // |
| // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S( |
| // OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // OPT-SPIRV-NEXT: [[ENTRY:.*:]] |
| // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64 |
| // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) |
| // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8 |
| // OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4 |
| // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 |
| // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4 |
| // OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8 |
| // OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 |
| // OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4 |
| // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 |
| // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4 |
| // OPT-SPIRV-NEXT: ret void |
| // |
| // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S( |
| // HOST-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 |
| // HOST-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[S_ADDR]], i64 8, i64 0) |
| // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 |
| // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] |
| // HOST: [[SETUP_NEXT]]: |
| // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel5P1S) |
| // HOST-NEXT: br label %[[SETUP_END]] |
| // HOST: [[SETUP_END]]: |
| // HOST-NEXT: ret void |
| // |
| __global__ void kernel5(struct S *s) { |
| s->x[0]++; |
| s->y[0] += 1.f; |
| } |
| |
| struct T { |
| float *x[2]; |
| }; |
| // `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect |
| // by-val). However, the enhanced address inferring pass should be able to |
| // assume they are global pointers. |
| // For SPIR-V, since byref is not supported at the moment, we pass it as direct. |
| |
| // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T( |
| // CHECK-SAME: ptr addrspace(4) noundef byref([[STRUCT_T:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_T]], align 8, addrspace(5) |
| // CHECK-NEXT: [[T:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr |
| // CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[T]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) |
| // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr [[T]], i32 0, i32 0 |
| // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr], ptr [[X]], i64 0, i64 0 |
| // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 0 |
| // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 |
| // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 1.000000e+00 |
| // CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX1]], align 4 |
| // CHECK-NEXT: [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr [[T]], i32 0, i32 0 |
| // CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[X2]], i64 0, i64 1 |
| // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 0 |
| // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 |
| // CHECK-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00 |
| // CHECK-NEXT: store float [[ADD5]], ptr [[ARRAYIDX4]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( |
| // CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] |
| // CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8 |
| // CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 |
| // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 |
| // CHECK-SPIRV-NEXT: store [2 x ptr addrspace(4)] [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 |
| // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X]], i64 0, i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 |
| // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 |
| // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 |
| // CHECK-SPIRV-NEXT: [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X3]], i64 0, i64 1 |
| // CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX4]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX5]], align 4 |
| // CHECK-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP5]], 2.000000e+00 |
| // CHECK-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[ARRAYIDX5]], align 4 |
| // CHECK-SPIRV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T( |
| // OPT-SAME: ptr addrspace(4) noundef readonly byref([[STRUCT_T:%.*]]) align 8 captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4]] |
| // OPT-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1) |
| // OPT-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8 |
| // OPT-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]] |
| // OPT-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1) |
| // OPT-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]] |
| // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 |
| // OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[TMP1]], align 4 |
| // OPT-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(1) [[TMP2]], align 4 |
| // OPT-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00 |
| // OPT-NEXT: store float [[ADD5]], ptr addrspace(1) [[TMP2]], align 4 |
| // OPT-NEXT: ret void |
| // |
| // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( |
| // OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // OPT-SPIRV-NEXT: [[ENTRY:.*:]] |
| // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 |
| // OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0 |
| // OPT-SPIRV-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 1 |
| // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4 |
| // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 1.000000e+00 |
| // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4 |
| // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4 |
| // OPT-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP2]], 2.000000e+00 |
| // OPT-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4 |
| // OPT-SPIRV-NEXT: ret void |
| // |
| // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel61T( |
| // HOST-SAME: ptr [[T_COERCE0:%.*]], ptr [[T_COERCE1:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[T:%.*]] = alloca [[STRUCT_T:%.*]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[T]], i32 0, i32 0 |
| // HOST-NEXT: store ptr [[T_COERCE0]], ptr [[TMP0]], align 8 |
| // HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[T]], i32 0, i32 1 |
| // HOST-NEXT: store ptr [[T_COERCE1]], ptr [[TMP1]], align 8 |
| // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[T]], i64 16, i64 0) |
| // HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 |
| // HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] |
| // HOST: [[SETUP_NEXT]]: |
| // HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel61T) |
| // HOST-NEXT: br label %[[SETUP_END]] |
| // HOST: [[SETUP_END]]: |
| // HOST-NEXT: ret void |
| // |
| __global__ void kernel6(struct T t) { |
| t.x[0][0] += 1.f; |
| t.x[1][0] += 2.f; |
| } |
| |
| // Check that coerced pointers retain the noalias attribute when qualified with __restrict. |
| |
| // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel7Pi( |
| // CHECK-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr |
| // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr |
| // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8 |
| // CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 |
| // CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 |
| // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 |
| // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi( |
| // CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] |
| // CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 |
| // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 |
| // CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 |
| // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0 |
| // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 |
| // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 |
| // CHECK-SPIRV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel7Pi( |
| // OPT-SAME: ptr addrspace(1) noalias noundef captures(none) [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 |
| // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 |
| // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 |
| // OPT-NEXT: ret void |
| // |
| // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi( |
| // OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // OPT-SPIRV-NEXT: [[ENTRY:.*:]] |
| // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 |
| // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) |
| // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 |
| // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 |
| // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 |
| // OPT-SPIRV-NEXT: ret void |
| // |
| // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi( |
| // HOST-SAME: ptr noalias noundef [[X:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 |
| // HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) |
| // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 |
| // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] |
| // HOST: [[SETUP_NEXT]]: |
| // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel7Pi) |
| // HOST-NEXT: br label %[[SETUP_END]] |
| // HOST: [[SETUP_END]]: |
| // HOST-NEXT: ret void |
| // |
| __global__ void kernel7(int *__restrict x) { |
| x[0]++; |
| } |
| |
| // Single element struct. |
| struct SS { |
| float *x; |
| }; |
| // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS( |
| // CHECK-SAME: ptr addrspace(1) [[A_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8, addrspace(5) |
| // CHECK-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr |
| // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A1]], i32 0, i32 0 |
| // CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 |
| // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A1]], i32 0, i32 0 |
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 |
| // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4 |
| // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00 |
| // CHECK-NEXT: store float [[ADD]], ptr [[TMP0]], align 4 |
| // CHECK-NEXT: ret void |
| // |
| // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( |
| // CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] |
| // CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8 |
| // CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) |
| // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0 |
| // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 |
| // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 |
| // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0 |
| // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 |
| // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4 |
| // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 3.000000e+00 |
| // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4 |
| // CHECK-SPIRV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS( |
| // OPT-SAME: ptr addrspace(1) captures(none) [[A_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(1) [[A_COERCE]], align 4 |
| // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP0]], 3.000000e+00 |
| // OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[A_COERCE]], align 4 |
| // OPT-NEXT: ret void |
| // |
| // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( |
| // OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { |
| // OPT-SPIRV-NEXT: [[ENTRY:.*:]] |
| // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 |
| // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4 |
| // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00 |
| // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP0]], align 4 |
| // OPT-SPIRV-NEXT: ret void |
| // |
| // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel82SS( |
| // HOST-SAME: ptr [[A_COERCE:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 |
| // HOST-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A]], i32 0, i32 0 |
| // HOST-NEXT: store ptr [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[A]], i64 8, i64 0) |
| // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 |
| // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] |
| // HOST: [[SETUP_NEXT]]: |
| // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel82SS) |
| // HOST-NEXT: br label %[[SETUP_END]] |
| // HOST: [[SETUP_END]]: |
| // HOST-NEXT: ret void |
| // |
| __global__ void kernel8(struct SS a) { |
| *a.x += 3.f; |
| } |
| //. |
| // CHECK-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1} |
| //. |
| // OPT: [[META4]] = !{} |
| //. |
| // OPT-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1} |
| //. |