blob: 90bf9afc8da0a5d4d92b41a537ac70837d9163d0 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; Test to verify that NVPTX backend correctly handles conversion of constant
; global vectors containing sub-byte sized elements.
; RUN: llc < %s -O0 -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -O0 -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
target triple = "nvptx-nvidia-cuda"
; CHECK: .visible .global .align 1 .b8 test0[1] = {33};
@test0 = local_unnamed_addr addrspace(1) constant <2 x i4> <i4 1, i4 2>, align 1
define <2 x half> @foo() {
; CHECK-LABEL: foo(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.global.b8 %rs1, [test0];
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b8 %e2m1x2_in;
; CHECK-NEXT: cvt.u8.u16 %e2m1x2_in, %rs1;
; CHECK-NEXT: cvt.rn.f16x2.e2m1x2 %r1, %e2m1x2_in;
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%ld = load i8, ptr addrspace(1) @test0, align 1
%in = zext nneg i8 %ld to i16
%val = call <2 x half> @llvm.nvvm.e2m1x2.to.f16x2.rn(i16 %in)
ret <2 x half> %val
}