[AMDGPU] Change llvm.amdgcn.image.bvh.intersect.ray to take vec3 args

The ray_origin, ray_dir and ray_inv_dir arguments should all be vec3 to
match how the hardware instruction works.

Don't change the API of the corresponding OpenCL builtins.

Differential Revision: https://reviews.llvm.org/D115032

GitOrigin-RevId: 2774bad1124215571ab154afcb5478c78cf46344
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 0a98b5b..44a24d2 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -16592,6 +16592,15 @@
     llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
     llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
 
+    // The builtins take these arguments as vec4 where the last element is
+    // ignored. The intrinsic takes them as vec3.
+    RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
+                                            ArrayRef<int>{0, 1, 2});
+    RayDir =
+        Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
+    RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
+                                                ArrayRef<int>{0, 1, 2});
+
     Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
                                    {NodePtr->getType(), RayDir->getType()});
     return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
diff --git a/test/CodeGenOpenCL/builtins-amdgcn-raytracing.cl b/test/CodeGenOpenCL/builtins-amdgcn-raytracing.cl
index 805d17a..3c90c9a 100644
--- a/test/CodeGenOpenCL/builtins-amdgcn-raytracing.cl
+++ b/test/CodeGenOpenCL/builtins-amdgcn-raytracing.cl
@@ -19,7 +19,7 @@
 typedef half half4 __attribute__((ext_vector_type(4)));
 typedef uint uint4 __attribute__((ext_vector_type(4)));
 
-// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32
+// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v3f32
 // ISA: image_bvh_intersect_ray
 void test_image_bvh_intersect_ray(global uint4* out, uint node_ptr,
   float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir,
@@ -29,7 +29,7 @@
            ray_origin, ray_dir, ray_inv_dir, texture_descr);
 }
 
-// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16
+// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v3f16
 // ISA: image_bvh_intersect_ray
 void test_image_bvh_intersect_ray_h(global uint4* out, uint node_ptr,
   float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir,
@@ -39,7 +39,7 @@
            ray_origin, ray_dir, ray_inv_dir, texture_descr);
 }
 
-// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32
+// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v3f32
 // ISA: image_bvh_intersect_ray
 void test_image_bvh_intersect_ray_l(global uint4* out, ulong node_ptr,
   float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir,
@@ -49,7 +49,7 @@
            ray_origin, ray_dir, ray_inv_dir, texture_descr);
 }
 
-// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16
+// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v3f16
 // ISA: image_bvh_intersect_ray
 void test_image_bvh_intersect_ray_lh(global uint4* out, ulong node_ptr,
   float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir,