| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK %s |
| ; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %} |
| |
| ; CHECK-LABEL: test_isspacep |
| define i1 @test_isspacep_shared_cluster(ptr %p) { |
| ; CHECK: isspacep.shared::cluster |
| %a = tail call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p) |
| ; CHECK: ret |
| ret i1 %a |
| } |
| |
| ; CHECK-LABEL: test_mapa( |
| define ptr @test_mapa(ptr %p, i32 %r) { |
| ; CHECK64: mapa.u64 |
| %a = call ptr @llvm.nvvm.mapa(ptr %p, i32 %r) |
| ret ptr %a |
| } |
| |
| ; CHECK-LABEL: test_mapa_shared_cluster( |
| define ptr addrspace(3) @test_mapa_shared_cluster(ptr addrspace(3) %p, i32 %r) { |
| ; CHECK: mapa.shared::cluster.u64 |
| %a = call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %p, i32 %r) |
| ret ptr addrspace(3) %a |
| } |
| |
| ; CHECK-LABEL: test_getctarank( |
| define i32 @test_getctarank(ptr %p) { |
| ; CHECK: getctarank.u64 |
| %a = call i32 @llvm.nvvm.getctarank(ptr %p) |
| ret i32 %a |
| } |
| |
| ; CHECK-LABEL: test_getctarank_shared_cluster( |
| define i32 @test_getctarank_shared_cluster(ptr addrspace(3) %p) { |
| ; CHECK64: getctarank.shared::cluster.u64 |
| ; CHECK32: getctarank.shared::cluster.u32 |
| %a = call i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) %p) |
| ret i32 %a |
| } |
| |
| ; CHECK-LABEL: test_clusterid_x( |
| define i32 @test_clusterid_x() { |
| ; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.x; |
| ; CHECK: ret; |
| %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x() |
| ret i32 %x |
| } |
| ; CHECK-LABEL: test_clusterid_y( |
| define i32 @test_clusterid_y() { |
| ; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.y; |
| ; CHECK: ret; |
| %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y() |
| ret i32 %x |
| } |
| ; CHECK-LABEL: test_clusterid_z( |
| define i32 @test_clusterid_z() { |
| ; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.z; |
| ; CHECK: ret; |
| %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z() |
| ret i32 %x |
| } |
| ; CHECK-LABEL: test_clusterid_w( |
| define i32 @test_clusterid_w() { |
| ; CHECK: mov.u32 %r{{[0-9]+}}, %clusterid.w; |
| ; CHECK: ret; |
| %x = call i32 @llvm.nvvm.read.ptx.sreg.clusterid.w() |
| ret i32 %x |
| } |
| |
| ; CHECK-LABEL: test_nclusterid_x( |
| define i32 @test_nclusterid_x() { |
| ; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.x; |
| ; CHECK: ret; |
| %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x() |
| ret i32 %x |
| } |
| ; CHECK-LABEL: test_nclusterid_y( |
| define i32 @test_nclusterid_y() { |
| ; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.y; |
| ; CHECK: ret; |
| %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y() |
| ret i32 %x |
| } |
| ; CHECK-LABEL: test_nclusterid_z( |
| define i32 @test_nclusterid_z() { |
| ; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.z; |
| ; CHECK: ret; |
| %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z() |
| ret i32 %x |
| } |
| ; CHECK-LABEL: test_nclusterid_w( |
| define i32 @test_nclusterid_w() { |
| ; CHECK: mov.u32 %r{{[0-9]+}}, %nclusterid.w; |
| ; CHECK: ret; |
| %x = call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w() |
| ret i32 %x |
| } |
| |
| ; CHECK-LABEL: test_cluster_ctarank( |
| define i32 @test_cluster_ctarank() { |
| ; CHECK: mov.u32 %r{{[0-9]+}}, %cluster_ctarank; |
| ; CHECK: ret; |
| %x = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank() |
| ret i32 %x |
| } |
| |
| ; CHECK-LABEL: test_cluster_nctarank( |
| define i32 @test_cluster_nctarank() { |
| ; CHECK: mov.u32 %r{{[0-9]+}}, %cluster_nctarank; |
| ; CHECK: ret; |
| %x = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank() |
| ret i32 %x |
| } |
| |
| ; CHECK-LABEL: test_is_explicit_cluster( |
| define i1 @test_is_explicit_cluster() { |
| ; CHECK: mov.pred %p{{[0-9]+}}, %is_explicit_cluster; |
| ; CHECK: ret; |
| %x = call i1 @llvm.nvvm.is_explicit_cluster() |
| ret i1 %x |
| } |
| |
| ; CHECK-LABEL: test_barrier_cluster( |
| define void @test_barrier_cluster() { |
| ; CHECK: barrier.cluster.arrive; |
| call void @llvm.nvvm.barrier.cluster.arrive() |
| ; CHECK: barrier.cluster.arrive.relaxed; |
| call void @llvm.nvvm.barrier.cluster.arrive.relaxed() |
| ; CHECK: barrier.cluster.wait; |
| call void @llvm.nvvm.barrier.cluster.wait() |
| ; CHECK: fence.sc.cluster |
| call void @llvm.nvvm.fence.sc.cluster() |
| ret void |
| } |
| |
| ; CHECK-LABEL: test_barrier_cluster_aligned( |
| define void @test_barrier_cluster_aligned() { |
| ; CHECK: barrier.cluster.arrive.aligned; |
| call void @llvm.nvvm.barrier.cluster.arrive.aligned() |
| ; CHECK: barrier.cluster.arrive.relaxed.aligned; |
| call void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned() |
| ; CHECK: barrier.cluster.wait.aligned; |
| call void @llvm.nvvm.barrier.cluster.wait.aligned() |
| ret void |
| } |
| |
| ; CHECK-LABEL: test_cp_async_bulk_commit_group( |
| define void @test_cp_async_bulk_commit_group() { |
| ; CHECK: cp.async.bulk.commit_group; |
| call void @llvm.nvvm.cp.async.bulk.commit.group() |
| ret void |
| } |
| |
| ; CHECK-LABEL: test_cp_async_bulk_wait_group( |
| define void @test_cp_async_bulk_wait_group() { |
| ; CHECK: cp.async.bulk.wait_group 8; |
| call void @llvm.nvvm.cp.async.bulk.wait.group(i32 8) |
| ; CHECK: cp.async.bulk.wait_group 0; |
| call void @llvm.nvvm.cp.async.bulk.wait.group(i32 0) |
| ret void |
| } |
| |
| ; CHECK-LABEL: test_cp_async_bulk_wait_group_read( |
| define void @test_cp_async_bulk_wait_group_read() { |
| ; CHECK: cp.async.bulk.wait_group.read 8; |
| call void @llvm.nvvm.cp.async.bulk.wait.group.read(i32 8) |
| ; CHECK: cp.async.bulk.wait_group.read 0; |
| call void @llvm.nvvm.cp.async.bulk.wait.group.read(i32 0) |
| ret void |
| } |
| |
| declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p); |
| declare ptr @llvm.nvvm.mapa(ptr %p, i32 %r); |
| declare ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %p, i32 %r); |
| declare i32 @llvm.nvvm.getctarank(ptr %p); |
| declare i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) %p); |
| declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.x() |
| declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.y() |
| declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.z() |
| declare i32 @llvm.nvvm.read.ptx.sreg.clusterid.w() |
| declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x() |
| declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y() |
| declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z() |
| declare i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w() |
| declare i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank() |
| declare i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank() |
| declare i1 @llvm.nvvm.is_explicit_cluster() |
| declare void @llvm.nvvm.barrier.cluster.arrive() |
| declare void @llvm.nvvm.barrier.cluster.arrive.relaxed() |
| declare void @llvm.nvvm.barrier.cluster.wait() |
| declare void @llvm.nvvm.barrier.cluster.arrive.aligned() |
| declare void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned() |
| declare void @llvm.nvvm.barrier.cluster.wait.aligned() |
| declare void @llvm.nvvm.fence.sc.cluster() |
| declare void @llvm.nvvm.cp.async.bulk.commit.group() |
| declare void @llvm.nvvm.cp.async.bulk.wait.group(i32) |
| declare void @llvm.nvvm.cp.async.bulk.wait.group.read(i32) |