| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; Test to verify that NVPTX backend correctly handles conversion of constant |
| ; global vectors containing sub-byte sized elements. |
| |
| ; RUN: llc < %s -O0 -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck %s |
| ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -O0 -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %} |
| |
| target triple = "nvptx-nvidia-cuda" |
| |
| ; CHECK: .visible .global .align 1 .b8 test0[1] = {33}; |
| @test0 = local_unnamed_addr addrspace(1) constant <2 x i4> <i4 1, i4 2>, align 1 |
| |
| define <2 x half> @foo() { |
| ; CHECK-LABEL: foo( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.global.b8 %rs1, [test0]; |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b8 %e2m1x2_in; |
| ; CHECK-NEXT: cvt.u8.u16 %e2m1x2_in, %rs1; |
| ; CHECK-NEXT: cvt.rn.f16x2.e2m1x2 %r1, %e2m1x2_in; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; |
| ; CHECK-NEXT: ret; |
| %ld = load i8, ptr addrspace(1) @test0, align 1 |
| %in = zext nneg i8 %ld to i16 |
| %val = call <2 x half> @llvm.nvvm.e2m1x2.to.f16x2.rn(i16 %in) |
| ret <2 x half> %val |
| } |