blob: a888d9996a5009b311a6fc302d3ce947334f94a6 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx82 | FileCheck %s -check-prefixes=NOMASK
; RUN: %if ptxas-sm_90 && ptxas-isa-8.2 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx82 | %ptxas-verify -arch=sm_90 %}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx83 | FileCheck %s -check-prefixes=MASK
; RUN: %if ptxas-sm_90 && ptxas-isa-8.3 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx83 | %ptxas-verify -arch=sm_90 %}
; On older architectures and versions, we shouldn't be seeing a used bytes mask pragma.
; Specifically, the pragma is only supported on SM_50 or later, and PTX 8.3 or later.
; Architecture fixed at SM_90 for this test for stability, and we vary the PTX version to test the pragma.
define i32 @global_8xi32(ptr %a, ptr %b) {
; NOMASK-LABEL: global_8xi32(
; NOMASK: {
; NOMASK-NEXT: .reg .b32 %r<5>;
; NOMASK-NEXT: .reg .b64 %rd<2>;
; NOMASK-EMPTY:
; NOMASK-NEXT: // %bb.0:
; NOMASK-NEXT: ld.param.b64 %rd1, [global_8xi32_param_0];
; NOMASK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
; NOMASK-NEXT: st.param.b32 [func_retval0], %r1;
; NOMASK-NEXT: ret;
;
; MASK-LABEL: global_8xi32(
; MASK: {
; MASK-NEXT: .reg .b32 %r<5>;
; MASK-NEXT: .reg .b64 %rd<2>;
; MASK-EMPTY:
; MASK-NEXT: // %bb.0:
; MASK-NEXT: ld.param.b64 %rd1, [global_8xi32_param_0];
; MASK-NEXT: .pragma "used_bytes_mask 0xfff";
; MASK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
; MASK-NEXT: st.param.b32 [func_retval0], %r1;
; MASK-NEXT: ret;
%a.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 16 %a, <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> poison)
%first = extractelement <4 x i32> %a.load, i32 0
ret i32 %first
}
declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr , <4 x i1>, <4 x i32>)