| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s -check-prefixes=NOPTRCONV |
| ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | FileCheck %s -check-prefixes=PTRCONV |
| ; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %} |
| ; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | %ptxas-verify -arch=sm_90 %} |
| |
| ; ALL-LABEL: conv_shared_cluster_to_generic |
| define i32 @conv_shared_cluster_to_generic(ptr addrspace(7) %ptr) { |
| ; CLS32: cvta.shared::cluster.u32 |
| ; NOPTRCONV-LABEL: conv_shared_cluster_to_generic( |
| ; NOPTRCONV: { |
| ; NOPTRCONV-NEXT: .reg .b32 %r<2>; |
| ; NOPTRCONV-NEXT: .reg .b64 %rd<3>; |
| ; NOPTRCONV-EMPTY: |
| ; NOPTRCONV-NEXT: // %bb.0: |
| ; NOPTRCONV-NEXT: ld.param.b64 %rd1, [conv_shared_cluster_to_generic_param_0]; |
| ; NOPTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1; |
| ; NOPTRCONV-NEXT: ld.b32 %r1, [%rd2]; |
| ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1; |
| ; NOPTRCONV-NEXT: ret; |
| ; |
| ; PTRCONV-LABEL: conv_shared_cluster_to_generic( |
| ; PTRCONV: { |
| ; PTRCONV-NEXT: .reg .b32 %r<3>; |
| ; PTRCONV-NEXT: .reg .b64 %rd<3>; |
| ; PTRCONV-EMPTY: |
| ; PTRCONV-NEXT: // %bb.0: |
| ; PTRCONV-NEXT: ld.param.b32 %r1, [conv_shared_cluster_to_generic_param_0]; |
| ; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1; |
| ; PTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1; |
| ; PTRCONV-NEXT: ld.b32 %r2, [%rd2]; |
| ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r2; |
| ; PTRCONV-NEXT: ret; |
| %genptr = addrspacecast ptr addrspace(7) %ptr to ptr |
| %val = load i32, ptr %genptr |
| ret i32 %val |
| } |
| |
| ; ALL-LABEL: conv_generic_to_shared_cluster |
| define i32 @conv_generic_to_shared_cluster(ptr %ptr) { |
| ; CLS32: cvta.to.shared::cluster.u32 |
| ; NOPTRCONV-LABEL: conv_generic_to_shared_cluster( |
| ; NOPTRCONV: { |
| ; NOPTRCONV-NEXT: .reg .b32 %r<2>; |
| ; NOPTRCONV-NEXT: .reg .b64 %rd<3>; |
| ; NOPTRCONV-EMPTY: |
| ; NOPTRCONV-NEXT: // %bb.0: |
| ; NOPTRCONV-NEXT: ld.param.b64 %rd1, [conv_generic_to_shared_cluster_param_0]; |
| ; NOPTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd2, %rd1; |
| ; NOPTRCONV-NEXT: ld.shared::cluster.b32 %r1, [%rd2]; |
| ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1; |
| ; NOPTRCONV-NEXT: ret; |
| ; |
| ; PTRCONV-LABEL: conv_generic_to_shared_cluster( |
| ; PTRCONV: { |
| ; PTRCONV-NEXT: .reg .b32 %r<3>; |
| ; PTRCONV-NEXT: .reg .b64 %rd<3>; |
| ; PTRCONV-EMPTY: |
| ; PTRCONV-NEXT: // %bb.0: |
| ; PTRCONV-NEXT: ld.param.b64 %rd1, [conv_generic_to_shared_cluster_param_0]; |
| ; PTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd2, %rd1; |
| ; PTRCONV-NEXT: cvt.u32.u64 %r1, %rd2; |
| ; PTRCONV-NEXT: ld.shared::cluster.b32 %r2, [%r1]; |
| ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r2; |
| ; PTRCONV-NEXT: ret; |
| %specptr = addrspacecast ptr %ptr to ptr addrspace(7) |
| %val = load i32, ptr addrspace(7) %specptr |
| ret i32 %val |
| } |
| |
| ; ALL-LABEL: conv_shared_to_shared_cluster |
| define i32 @conv_shared_to_shared_cluster(ptr addrspace(3) %ptr) { |
| ; NOPTRCONV-LABEL: conv_shared_to_shared_cluster( |
| ; NOPTRCONV: { |
| ; NOPTRCONV-NEXT: .reg .b32 %r<2>; |
| ; NOPTRCONV-NEXT: .reg .b64 %rd<4>; |
| ; NOPTRCONV-EMPTY: |
| ; NOPTRCONV-NEXT: // %bb.0: |
| ; NOPTRCONV-NEXT: ld.param.b64 %rd1, [conv_shared_to_shared_cluster_param_0]; |
| ; NOPTRCONV-NEXT: cvta.shared.u64 %rd2, %rd1; |
| ; NOPTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd3, %rd2; |
| ; NOPTRCONV-NEXT: ld.shared::cluster.b32 %r1, [%rd3]; |
| ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1; |
| ; NOPTRCONV-NEXT: ret; |
| ; |
| ; PTRCONV-LABEL: conv_shared_to_shared_cluster( |
| ; PTRCONV: { |
| ; PTRCONV-NEXT: .reg .b32 %r<4>; |
| ; PTRCONV-NEXT: .reg .b64 %rd<4>; |
| ; PTRCONV-EMPTY: |
| ; PTRCONV-NEXT: // %bb.0: |
| ; PTRCONV-NEXT: ld.param.b32 %r1, [conv_shared_to_shared_cluster_param_0]; |
| ; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1; |
| ; PTRCONV-NEXT: cvta.shared.u64 %rd2, %rd1; |
| ; PTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd3, %rd2; |
| ; PTRCONV-NEXT: cvt.u32.u64 %r2, %rd3; |
| ; PTRCONV-NEXT: ld.shared::cluster.b32 %r3, [%r2]; |
| ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r3; |
| ; PTRCONV-NEXT: ret; |
| %specptr = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(7) |
| %val = load i32, ptr addrspace(7) %specptr |
| ret i32 %val |
| } |
| |
| ; ALL-LABEL: conv_shared_cluster_to_shared |
| define i32 @conv_shared_cluster_to_shared(ptr addrspace(7) %ptr) { |
| ; NOPTRCONV-LABEL: conv_shared_cluster_to_shared( |
| ; NOPTRCONV: { |
| ; NOPTRCONV-NEXT: .reg .b32 %r<2>; |
| ; NOPTRCONV-NEXT: .reg .b64 %rd<4>; |
| ; NOPTRCONV-EMPTY: |
| ; NOPTRCONV-NEXT: // %bb.0: |
| ; NOPTRCONV-NEXT: ld.param.b64 %rd1, [conv_shared_cluster_to_shared_param_0]; |
| ; NOPTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1; |
| ; NOPTRCONV-NEXT: cvta.to.shared.u64 %rd3, %rd2; |
| ; NOPTRCONV-NEXT: ld.shared.b32 %r1, [%rd3]; |
| ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1; |
| ; NOPTRCONV-NEXT: ret; |
| ; |
| ; PTRCONV-LABEL: conv_shared_cluster_to_shared( |
| ; PTRCONV: { |
| ; PTRCONV-NEXT: .reg .b32 %r<4>; |
| ; PTRCONV-NEXT: .reg .b64 %rd<4>; |
| ; PTRCONV-EMPTY: |
| ; PTRCONV-NEXT: // %bb.0: |
| ; PTRCONV-NEXT: ld.param.b32 %r1, [conv_shared_cluster_to_shared_param_0]; |
| ; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1; |
| ; PTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1; |
| ; PTRCONV-NEXT: cvta.to.shared.u64 %rd3, %rd2; |
| ; PTRCONV-NEXT: cvt.u32.u64 %r2, %rd3; |
| ; PTRCONV-NEXT: ld.shared.b32 %r3, [%r2]; |
| ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r3; |
| ; PTRCONV-NEXT: ret; |
| %specptr = addrspacecast ptr addrspace(7) %ptr to ptr addrspace(3) |
| %val = load i32, ptr addrspace(3) %specptr |
| ret i32 %val |
| } |