blob: 929196fcb00a8b16d03a58b2802722100faa115a [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s -check-prefixes=NOPTRCONV
; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | FileCheck %s -check-prefixes=PTRCONV
; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | %ptxas-verify -arch=sm_90 %}
; ALL-LABEL: conv_shared_cluster_to_generic
define i32 @conv_shared_cluster_to_generic(ptr addrspace(7) %ptr) {
; CLS32: cvta.shared::cluster.u32
; NOPTRCONV-LABEL: conv_shared_cluster_to_generic(
; NOPTRCONV: {
; NOPTRCONV-NEXT: .reg .b32 %r<2>;
; NOPTRCONV-NEXT: .reg .b64 %rd<3>;
; NOPTRCONV-EMPTY:
; NOPTRCONV-NEXT: // %bb.0:
; NOPTRCONV-NEXT: ld.param.b64 %rd1, [conv_shared_cluster_to_generic_param_0];
; NOPTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
; NOPTRCONV-NEXT: ld.b32 %r1, [%rd2];
; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
; NOPTRCONV-NEXT: ret;
;
; PTRCONV-LABEL: conv_shared_cluster_to_generic(
; PTRCONV: {
; PTRCONV-NEXT: .reg .b32 %r<3>;
; PTRCONV-NEXT: .reg .b64 %rd<3>;
; PTRCONV-EMPTY:
; PTRCONV-NEXT: // %bb.0:
; PTRCONV-NEXT: ld.param.b32 %r1, [conv_shared_cluster_to_generic_param_0];
; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1;
; PTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
; PTRCONV-NEXT: ld.b32 %r2, [%rd2];
; PTRCONV-NEXT: st.param.b32 [func_retval0], %r2;
; PTRCONV-NEXT: ret;
%genptr = addrspacecast ptr addrspace(7) %ptr to ptr
%val = load i32, ptr %genptr
ret i32 %val
}
; ALL-LABEL: conv_generic_to_shared_cluster
define i32 @conv_generic_to_shared_cluster(ptr %ptr) {
; CLS32: cvta.to.shared::cluster.u32
; NOPTRCONV-LABEL: conv_generic_to_shared_cluster(
; NOPTRCONV: {
; NOPTRCONV-NEXT: .reg .b32 %r<2>;
; NOPTRCONV-NEXT: .reg .b64 %rd<3>;
; NOPTRCONV-EMPTY:
; NOPTRCONV-NEXT: // %bb.0:
; NOPTRCONV-NEXT: ld.param.b64 %rd1, [conv_generic_to_shared_cluster_param_0];
; NOPTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd2, %rd1;
; NOPTRCONV-NEXT: ld.shared::cluster.b32 %r1, [%rd2];
; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
; NOPTRCONV-NEXT: ret;
;
; PTRCONV-LABEL: conv_generic_to_shared_cluster(
; PTRCONV: {
; PTRCONV-NEXT: .reg .b32 %r<3>;
; PTRCONV-NEXT: .reg .b64 %rd<3>;
; PTRCONV-EMPTY:
; PTRCONV-NEXT: // %bb.0:
; PTRCONV-NEXT: ld.param.b64 %rd1, [conv_generic_to_shared_cluster_param_0];
; PTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd2, %rd1;
; PTRCONV-NEXT: cvt.u32.u64 %r1, %rd2;
; PTRCONV-NEXT: ld.shared::cluster.b32 %r2, [%r1];
; PTRCONV-NEXT: st.param.b32 [func_retval0], %r2;
; PTRCONV-NEXT: ret;
%specptr = addrspacecast ptr %ptr to ptr addrspace(7)
%val = load i32, ptr addrspace(7) %specptr
ret i32 %val
}
; ALL-LABEL: conv_shared_to_shared_cluster
define i32 @conv_shared_to_shared_cluster(ptr addrspace(3) %ptr) {
; NOPTRCONV-LABEL: conv_shared_to_shared_cluster(
; NOPTRCONV: {
; NOPTRCONV-NEXT: .reg .b32 %r<2>;
; NOPTRCONV-NEXT: .reg .b64 %rd<4>;
; NOPTRCONV-EMPTY:
; NOPTRCONV-NEXT: // %bb.0:
; NOPTRCONV-NEXT: ld.param.b64 %rd1, [conv_shared_to_shared_cluster_param_0];
; NOPTRCONV-NEXT: cvta.shared.u64 %rd2, %rd1;
; NOPTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd3, %rd2;
; NOPTRCONV-NEXT: ld.shared::cluster.b32 %r1, [%rd3];
; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
; NOPTRCONV-NEXT: ret;
;
; PTRCONV-LABEL: conv_shared_to_shared_cluster(
; PTRCONV: {
; PTRCONV-NEXT: .reg .b32 %r<4>;
; PTRCONV-NEXT: .reg .b64 %rd<4>;
; PTRCONV-EMPTY:
; PTRCONV-NEXT: // %bb.0:
; PTRCONV-NEXT: ld.param.b32 %r1, [conv_shared_to_shared_cluster_param_0];
; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1;
; PTRCONV-NEXT: cvta.shared.u64 %rd2, %rd1;
; PTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd3, %rd2;
; PTRCONV-NEXT: cvt.u32.u64 %r2, %rd3;
; PTRCONV-NEXT: ld.shared::cluster.b32 %r3, [%r2];
; PTRCONV-NEXT: st.param.b32 [func_retval0], %r3;
; PTRCONV-NEXT: ret;
%specptr = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(7)
%val = load i32, ptr addrspace(7) %specptr
ret i32 %val
}
; ALL-LABEL: conv_shared_cluster_to_shared
define i32 @conv_shared_cluster_to_shared(ptr addrspace(7) %ptr) {
; NOPTRCONV-LABEL: conv_shared_cluster_to_shared(
; NOPTRCONV: {
; NOPTRCONV-NEXT: .reg .b32 %r<2>;
; NOPTRCONV-NEXT: .reg .b64 %rd<4>;
; NOPTRCONV-EMPTY:
; NOPTRCONV-NEXT: // %bb.0:
; NOPTRCONV-NEXT: ld.param.b64 %rd1, [conv_shared_cluster_to_shared_param_0];
; NOPTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
; NOPTRCONV-NEXT: cvta.to.shared.u64 %rd3, %rd2;
; NOPTRCONV-NEXT: ld.shared.b32 %r1, [%rd3];
; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
; NOPTRCONV-NEXT: ret;
;
; PTRCONV-LABEL: conv_shared_cluster_to_shared(
; PTRCONV: {
; PTRCONV-NEXT: .reg .b32 %r<4>;
; PTRCONV-NEXT: .reg .b64 %rd<4>;
; PTRCONV-EMPTY:
; PTRCONV-NEXT: // %bb.0:
; PTRCONV-NEXT: ld.param.b32 %r1, [conv_shared_cluster_to_shared_param_0];
; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1;
; PTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
; PTRCONV-NEXT: cvta.to.shared.u64 %rd3, %rd2;
; PTRCONV-NEXT: cvt.u32.u64 %r2, %rd3;
; PTRCONV-NEXT: ld.shared.b32 %r3, [%r2];
; PTRCONV-NEXT: st.param.b32 [func_retval0], %r3;
; PTRCONV-NEXT: ret;
%specptr = addrspacecast ptr addrspace(7) %ptr to ptr addrspace(3)
%val = load i32, ptr addrspace(3) %specptr
ret i32 %val
}