[test-suite, CUDA] Run test kernel with just one thread.
For some reason this test is flaky on sm_60+, with the flaky failures irrelevant
to what we're testing here. Reducing grid size should reduce the failure rate.
diff --git a/External/CUDA/new.cu b/External/CUDA/new.cu
index 23fc530..95ef760 100644
--- a/External/CUDA/new.cu
+++ b/External/CUDA/new.cu
@@ -58,7 +58,7 @@
}
int main() {
- kernel<<<32, 32>>>();
+ kernel<<<1, 1>>>();
cudaError_t err = cudaDeviceSynchronize();
if (err != cudaSuccess) {
printf("CUDA error %d\n", (int)err);