| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s 2>&1 | FileCheck -check-prefix=ERR %s |
| ; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s 2>&1 | FileCheck -check-prefix=ERR %s |
| ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s |
| ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s |
| |
| declare {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh.dual.intersect.ray(i64, float, i8, <3 x float>, <3 x float>, <2 x i32>, <4 x i32>) |
| |
| ; ERR: in function image_bvh_dual_intersect_ray{{.*}}intrinsic not supported on subtarget |
| define amdgpu_ps <10 x float> @image_bvh_dual_intersect_ray(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, <2 x i32> %offsets, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) { |
| ; GFX12-SDAG-LABEL: image_bvh_dual_intersect_ray: |
| ; GFX12-SDAG: ; %bb.0: ; %main_body |
| ; GFX12-SDAG-NEXT: v_dual_mov_b32 v22, v8 :: v_dual_mov_b32 v21, v7 |
| ; GFX12-SDAG-NEXT: v_dual_mov_b32 v20, v6 :: v_dual_mov_b32 v19, v5 |
| ; GFX12-SDAG-NEXT: v_dual_mov_b32 v18, v4 :: v_dual_mov_b32 v17, v3 |
| ; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX12-SDAG-NEXT: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[2:3], v[17:19], v[20:22], v[9:10]], s[0:3] |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: global_store_b96 v[11:12], v[17:19], off |
| ; GFX12-SDAG-NEXT: global_store_b96 v[13:14], v[20:22], off |
| ; GFX12-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-GISEL-LABEL: image_bvh_dual_intersect_ray: |
| ; GFX12-GISEL: ; %bb.0: ; %main_body |
| ; GFX12-GISEL-NEXT: v_dual_mov_b32 v15, v3 :: v_dual_mov_b32 v16, v4 |
| ; GFX12-GISEL-NEXT: v_dual_mov_b32 v17, v5 :: v_dual_mov_b32 v18, v6 |
| ; GFX12-GISEL-NEXT: v_dual_mov_b32 v19, v7 :: v_dual_mov_b32 v20, v8 |
| ; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX12-GISEL-NEXT: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[2:3], v[15:17], v[18:20], v[9:10]], s[0:3] |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: global_store_b96 v[11:12], v[15:17], off |
| ; GFX12-GISEL-NEXT: global_store_b96 v[13:14], v[18:20], off |
| ; GFX12-GISEL-NEXT: ; return to shader part epilog |
| main_body: |
| %ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0 |
| %ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1 |
| %ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2 |
| %ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0 |
| %ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1 |
| %ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2 |
| %v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh.dual.intersect.ray(i64 %node_ptr, float %ray_extent, i8 0, <3 x float> %ray_origin, <3 x float> %ray_dir, <2 x i32> %offsets, <4 x i32> %tdescr) |
| %a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0 |
| %r = bitcast <10 x i32> %a to <10 x float> |
| %o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1 |
| store <3 x float> %o, ptr addrspace(1) %origin |
| %d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2 |
| store <3 x float> %d, ptr addrspace(1) %dir |
| ret <10 x float> %r |
| } |
| |
| define amdgpu_ps <10 x float> @image_bvh_dual_intersect_ray_1(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, <2 x i32> %offsets, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) { |
| ; GFX12-SDAG-LABEL: image_bvh_dual_intersect_ray_1: |
| ; GFX12-SDAG: ; %bb.0: ; %main_body |
| ; GFX12-SDAG-NEXT: v_dual_mov_b32 v22, v8 :: v_dual_mov_b32 v21, v7 |
| ; GFX12-SDAG-NEXT: v_dual_mov_b32 v20, v6 :: v_dual_mov_b32 v19, v5 |
| ; GFX12-SDAG-NEXT: v_dual_mov_b32 v18, v4 :: v_dual_mov_b32 v17, v3 |
| ; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 1 |
| ; GFX12-SDAG-NEXT: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[2:3], v[17:19], v[20:22], v[9:10]], s[0:3] |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: global_store_b96 v[11:12], v[17:19], off |
| ; GFX12-SDAG-NEXT: global_store_b96 v[13:14], v[20:22], off |
| ; GFX12-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-GISEL-LABEL: image_bvh_dual_intersect_ray_1: |
| ; GFX12-GISEL: ; %bb.0: ; %main_body |
| ; GFX12-GISEL-NEXT: v_dual_mov_b32 v15, v3 :: v_dual_mov_b32 v16, v4 |
| ; GFX12-GISEL-NEXT: v_dual_mov_b32 v17, v5 :: v_dual_mov_b32 v18, v6 |
| ; GFX12-GISEL-NEXT: v_dual_mov_b32 v19, v7 :: v_dual_mov_b32 v20, v8 |
| ; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 1 |
| ; GFX12-GISEL-NEXT: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[2:3], v[15:17], v[18:20], v[9:10]], s[0:3] |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: global_store_b96 v[11:12], v[15:17], off |
| ; GFX12-GISEL-NEXT: global_store_b96 v[13:14], v[18:20], off |
| ; GFX12-GISEL-NEXT: ; return to shader part epilog |
| main_body: |
| %ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0 |
| %ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1 |
| %ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2 |
| %ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0 |
| %ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1 |
| %ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2 |
| %v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh.dual.intersect.ray(i64 %node_ptr, float %ray_extent, i8 1, <3 x float> %ray_origin, <3 x float> %ray_dir, <2 x i32> %offsets, <4 x i32> %tdescr) |
| %a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0 |
| %r = bitcast <10 x i32> %a to <10 x float> |
| %o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1 |
| store <3 x float> %o, ptr addrspace(1) %origin |
| %d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2 |
| store <3 x float> %d, ptr addrspace(1) %dir |
| ret <10 x float> %r |
| } |