blob: c35ccf5dc318b185deebb565567206ecf096bc3d [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx91 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx91 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_120f -mattr=+ptx91 | FileCheck %s
; RUN: %if ptxas-sm_100f && ptxas-isa-9.1 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx91 | %ptxas-verify -arch=sm_100f %}
; RUN: %if ptxas-sm_110f && ptxas-isa-9.1 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx91 | %ptxas-verify -arch=sm_110f %}
; RUN: %if ptxas-sm_120f && ptxas-isa-9.1 %{ llc < %s -march=nvptx64 -mcpu=sm_120f -mattr=+ptx91 | %ptxas-verify -arch=sm_120f %}
; F16x2 to E2M1x2 (fp4x2)
define i16 @cvt_rn_f16x2_e2m1x2(<2 x half> %in) {
; CHECK-LABEL: cvt_rn_f16x2_e2m1x2(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_f16x2_e2m1x2_param_0];
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b8 %e2m1x2_out;
; CHECK-NEXT: cvt.rn.satfinite.e2m1x2.f16x2 %e2m1x2_out, %r1;
; CHECK-NEXT: cvt.u16.u8 %rs1, %e2m1x2_out;
; CHECK-NEXT: }
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%val = call i16 @llvm.nvvm.f16x2.to.e2m1x2.rn.satfinite(<2 x half> %in)
ret i16 %val
}
define i16 @cvt_rn_relu_f16x2_e2m1x2(<2 x half> %in) {
; CHECK-LABEL: cvt_rn_relu_f16x2_e2m1x2(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_relu_f16x2_e2m1x2_param_0];
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b8 %e2m1x2_out;
; CHECK-NEXT: cvt.rn.satfinite.relu.e2m1x2.f16x2 %e2m1x2_out, %r1;
; CHECK-NEXT: cvt.u16.u8 %rs1, %e2m1x2_out;
; CHECK-NEXT: }
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%val = call i16 @llvm.nvvm.f16x2.to.e2m1x2.rn.relu.satfinite(<2 x half> %in)
ret i16 %val
}
; BF16x2 to E2M1x2 (fp4x2)
define i16 @cvt_rn_sf_e2m1x2_bf16x2(<2 x bfloat> %in) {
; CHECK-LABEL: cvt_rn_sf_e2m1x2_bf16x2(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_sf_e2m1x2_bf16x2_param_0];
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b8 %e2m1x2_out;
; CHECK-NEXT: cvt.rn.satfinite.e2m1x2.bf16x2 %e2m1x2_out, %r1;
; CHECK-NEXT: cvt.u16.u8 %rs1, %e2m1x2_out;
; CHECK-NEXT: }
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%val = call i16 @llvm.nvvm.bf16x2.to.e2m1x2.rn.satfinite(<2 x bfloat> %in)
ret i16 %val
}
define i16 @cvt_rn_relu_sf_e2m1x2_bf16x2(<2 x bfloat> %in) {
; CHECK-LABEL: cvt_rn_relu_sf_e2m1x2_bf16x2(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_relu_sf_e2m1x2_bf16x2_param_0];
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b8 %e2m1x2_out;
; CHECK-NEXT: cvt.rn.satfinite.relu.e2m1x2.bf16x2 %e2m1x2_out, %r1;
; CHECK-NEXT: cvt.u16.u8 %rs1, %e2m1x2_out;
; CHECK-NEXT: }
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%val = call i16 @llvm.nvvm.bf16x2.to.e2m1x2.rn.relu.satfinite(<2 x bfloat> %in)
ret i16 %val
}