| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx82 | FileCheck %s -check-prefixes=NOMASK |
| ; RUN: %if ptxas-sm_90 && ptxas-isa-8.2 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx82 | %ptxas-verify -arch=sm_90 %} |
| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx83 | FileCheck %s -check-prefixes=MASK |
| ; RUN: %if ptxas-sm_90 && ptxas-isa-8.3 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx83 | %ptxas-verify -arch=sm_90 %} |
| |
| ; On older architectures and versions, we shouldn't be seeing a used bytes mask pragma. |
| ; Specifically, the pragma is only supported on SM_50 or later, and PTX 8.3 or later. |
| ; Architecture fixed at SM_90 for this test for stability, and we vary the PTX version to test the pragma. |
| |
| define i32 @global_8xi32(ptr %a, ptr %b) { |
| ; NOMASK-LABEL: global_8xi32( |
| ; NOMASK: { |
| ; NOMASK-NEXT: .reg .b32 %r<5>; |
| ; NOMASK-NEXT: .reg .b64 %rd<2>; |
| ; NOMASK-EMPTY: |
| ; NOMASK-NEXT: // %bb.0: |
| ; NOMASK-NEXT: ld.param.b64 %rd1, [global_8xi32_param_0]; |
| ; NOMASK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; NOMASK-NEXT: st.param.b32 [func_retval0], %r1; |
| ; NOMASK-NEXT: ret; |
| ; |
| ; MASK-LABEL: global_8xi32( |
| ; MASK: { |
| ; MASK-NEXT: .reg .b32 %r<5>; |
| ; MASK-NEXT: .reg .b64 %rd<2>; |
| ; MASK-EMPTY: |
| ; MASK-NEXT: // %bb.0: |
| ; MASK-NEXT: ld.param.b64 %rd1, [global_8xi32_param_0]; |
| ; MASK-NEXT: .pragma "used_bytes_mask 0xfff"; |
| ; MASK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; MASK-NEXT: st.param.b32 [func_retval0], %r1; |
| ; MASK-NEXT: ret; |
| %a.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 16 %a, <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> poison) |
| %first = extractelement <4 x i32> %a.load, i32 0 |
| ret i32 %first |
| } |
| declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr , <4 x i1>, <4 x i32>) |