| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 |
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s |
| |
| // Unlike OpenCL, HIP depends on the C++ interpration of "unsigned long", which |
| // is 64 bits long on Linux and 32 bits long on Windows. The return type of the |
| // ballot intrinsic needs to be a 64 bit integer on both platforms. This test |
| // cross-compiles to Windows to confirm that the return type is indeed 64 bits |
| // on Windows. |
| |
| #define __device__ __attribute__((device)) |
| |
| // CHECK-LABEL: define spir_func noundef i64 @_Z3fooi( |
| // CHECK-SAME: i32 noundef [[P:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] { |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8 |
| // CHECK-NEXT: [[P_ADDR:%.*]] = alloca i32, align 4 |
| // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4) |
| // CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr [[P_ADDR]] to ptr addrspace(4) |
| // CHECK-NEXT: store i32 [[P]], ptr addrspace(4) [[P_ADDR_ASCAST]], align 4 |
| // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[P_ADDR_ASCAST]], align 4 |
| // CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 |
| // CHECK-NEXT: [[TMP1:%.*]] = call addrspace(4) i64 @llvm.amdgcn.ballot.i64(i1 [[TOBOOL]]) |
| // CHECK-NEXT: ret i64 [[TMP1]] |
| // |
| __device__ unsigned long long foo(int p) { |
| return __builtin_amdgcn_ballot_w64(p); |
| } |