| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx90 | FileCheck %s |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| ; Test "blocksareclusters" attribute with full "reqntid" and "cluster_dim" |
| ; attributes. |
| define ptx_kernel void @kernel1(ptr %input, ptr %output) #0 #1 #2 { |
| ; CHECK-LABEL: kernel1( |
| ; CHECK: .reqntid 1024, 1, 1 |
| ; CHECK-NEXT: .reqnctapercluster 2, 2, 2 |
| ; CHECK-NEXT: .blocksareclusters |
| ; CHECK-NEXT: { |
| ; CHECK-EMPTY: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ret; |
| ret void |
| } |
| |
| ; Test "blocksareclusters" attribute with single dimension "reqntid" and |
| ; "cluster_dim" attributes. |
| define ptx_kernel void @kernel2(ptr %input, ptr %output) #0 #3 #4 { |
| ; CHECK-LABEL: kernel2( |
| ; CHECK: .reqntid 1024 |
| ; CHECK-NEXT: .reqnctapercluster 2 |
| ; CHECK-NEXT: .blocksareclusters // @kernel2 |
| ; CHECK-NEXT: { |
| ; CHECK-EMPTY: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ret; |
| ret void |
| } |
| |
| ; Test "blocksareclusters" attribute with two dimensions(not z dimension) |
| ; "reqntid" and "cluster_dim" attributes. |
| define ptx_kernel void @kernel3(ptr %input, ptr %output) #0 #5 #6 { |
| ; CHECK-LABEL: kernel3( |
| ; CHECK: .reqntid 512, 2 |
| ; CHECK-NEXT: .reqnctapercluster 2, 2 |
| ; CHECK-NEXT: .blocksareclusters // @kernel3 |
| ; CHECK-NEXT: { |
| ; CHECK-EMPTY: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ret; |
| ret void |
| } |
| |
| attributes #0 = { "nvvm.blocksareclusters" } |
| |
| attributes #1 = { "nvvm.reqntid"="1024,1,1" } |
| attributes #2 = { "nvvm.cluster_dim"="2,2,2" } |
| |
| attributes #3 = { "nvvm.reqntid"="1024" } |
| attributes #4 = { "nvvm.cluster_dim"="2" } |
| |
| attributes #5 = { "nvvm.reqntid"="512,2" } |
| attributes #6 = { "nvvm.cluster_dim"="2,2" } |