[Clang] Fix GPU match any truncating 64-bit lane mask
Summary:
This is a lane mask, needs all 64 bits for those wave64 targets. At some
point we should introduce __lanemask_t for this.
diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 0fb3916..d308cc9 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -261,7 +261,7 @@
// Returns a bitmask marking all lanes that have the same value of __x.
_DEFAULT_FN_ATTRS static __inline__ uint64_t
__gpu_match_any_u32_impl(uint64_t __lane_mask, uint32_t __x) {
- uint32_t __match_mask = 0;
+ uint64_t __match_mask = 0;
bool __done = 0;
while (__gpu_ballot(__lane_mask, !__done)) {