| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx87 | FileCheck %s |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103a -mattr=+ptx87 | FileCheck %s |
| ; RUN: %if ptxas-sm_100a && ptxas-isa-8.7 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100a -mattr=+ptx87 | %ptxas-verify -arch=sm_100a %} |
| ; RUN: %if ptxas-sm_103a && ptxas-isa-8.7 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_103a -mattr=+ptx87 | %ptxas-verify -arch=sm_103a %} |
| |
| ; F16X2 conversions |
| |
| define <2 x half> @cvt_rs_f16x2_f32(float %f1, float %f2, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_f16x2_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [cvt_rs_f16x2_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [cvt_rs_f16x2_f32_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [cvt_rs_f16x2_f32_param_2]; |
| ; CHECK-NEXT: cvt.rs.f16x2.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %val = call <2 x half> @llvm.nvvm.ff2f16x2.rs(float %f1, float %f2, i32 %rbits) |
| ret <2 x half> %val |
| } |
| |
| define <2 x half> @cvt_rs_relu_f16x2_f32(float %f1, float %f2, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_relu_f16x2_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [cvt_rs_relu_f16x2_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [cvt_rs_relu_f16x2_f32_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [cvt_rs_relu_f16x2_f32_param_2]; |
| ; CHECK-NEXT: cvt.rs.relu.f16x2.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %val = call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu(float %f1, float %f2, i32 %rbits) |
| ret <2 x half> %val |
| } |
| |
| define <2 x half> @cvt_rs_sf_f16x2_f32(float %f1, float %f2, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_sf_f16x2_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [cvt_rs_sf_f16x2_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [cvt_rs_sf_f16x2_f32_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [cvt_rs_sf_f16x2_f32_param_2]; |
| ; CHECK-NEXT: cvt.rs.satfinite.f16x2.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %val = call <2 x half> @llvm.nvvm.ff2f16x2.rs.satfinite(float %f1, float %f2, i32 %rbits) |
| ret <2 x half> %val |
| } |
| |
| define <2 x half> @cvt_rs_relu_sf_f16x2_f32(float %f1, float %f2, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_relu_sf_f16x2_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [cvt_rs_relu_sf_f16x2_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [cvt_rs_relu_sf_f16x2_f32_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [cvt_rs_relu_sf_f16x2_f32_param_2]; |
| ; CHECK-NEXT: cvt.rs.relu.satfinite.f16x2.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %val = call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu.satfinite(float %f1, float %f2, i32 %rbits) |
| ret <2 x half> %val |
| } |
| |
| ; BF16X2 conversions |
| |
| define <2 x bfloat> @cvt_rs_bf16x2_f32(float %f1, float %f2, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_bf16x2_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [cvt_rs_bf16x2_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [cvt_rs_bf16x2_f32_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [cvt_rs_bf16x2_f32_param_2]; |
| ; CHECK-NEXT: cvt.rs.bf16x2.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs(float %f1, float %f2, i32 %rbits) |
| ret <2 x bfloat> %val |
| } |
| |
| define <2 x bfloat> @cvt_rs_relu_bf16x2_f32(float %f1, float %f2, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_relu_bf16x2_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [cvt_rs_relu_bf16x2_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [cvt_rs_relu_bf16x2_f32_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [cvt_rs_relu_bf16x2_f32_param_2]; |
| ; CHECK-NEXT: cvt.rs.relu.bf16x2.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu(float %f1, float %f2, i32 %rbits) |
| ret <2 x bfloat> %val |
| } |
| |
| define <2 x bfloat> @cvt_rs_sf_bf16x2_f32(float %f1, float %f2, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_sf_bf16x2_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [cvt_rs_sf_bf16x2_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [cvt_rs_sf_bf16x2_f32_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [cvt_rs_sf_bf16x2_f32_param_2]; |
| ; CHECK-NEXT: cvt.rs.satfinite.bf16x2.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.satfinite(float %f1, float %f2, i32 %rbits) |
| ret <2 x bfloat> %val |
| } |
| |
| define <2 x bfloat> @cvt_rs_relu_sf_bf16x2_f32(float %f1, float %f2, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_relu_sf_bf16x2_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [cvt_rs_relu_sf_bf16x2_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [cvt_rs_relu_sf_bf16x2_f32_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [cvt_rs_relu_sf_bf16x2_f32_param_2]; |
| ; CHECK-NEXT: cvt.rs.relu.satfinite.bf16x2.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu.satfinite(float %f1, float %f2, i32 %rbits) |
| ret <2 x bfloat> %val |
| } |
| |
| ; F8X4 conversions |
| |
| define <4 x i8> @cvt_rs_sf_e4m3x4_f32(<4 x float> %fvec, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_sf_e4m3x4_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [cvt_rs_sf_e4m3x4_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r5, [cvt_rs_sf_e4m3x4_f32_param_1]; |
| ; CHECK-NEXT: cvt.rs.satfinite.e4m3x4.f32 %r6, {%r1, %r2, %r3, %r4}, %r5; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-NEXT: ret; |
| %val = call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.satfinite(<4 x float> %fvec, i32 %rbits) |
| ret <4 x i8> %val |
| } |
| |
| define <4 x i8> @cvt_rs_relu_sf_e4m3x4_f32(<4 x float> %fvec, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_relu_sf_e4m3x4_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [cvt_rs_relu_sf_e4m3x4_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r5, [cvt_rs_relu_sf_e4m3x4_f32_param_1]; |
| ; CHECK-NEXT: cvt.rs.relu.satfinite.e4m3x4.f32 %r6, {%r1, %r2, %r3, %r4}, %r5; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-NEXT: ret; |
| %val = call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.relu.satfinite(<4 x float> %fvec, i32 %rbits) |
| ret <4 x i8> %val |
| } |
| |
| define <4 x i8> @cvt_rs_sf_e5m2x4_f32(<4 x float> %fvec, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_sf_e5m2x4_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [cvt_rs_sf_e5m2x4_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r5, [cvt_rs_sf_e5m2x4_f32_param_1]; |
| ; CHECK-NEXT: cvt.rs.satfinite.e5m2x4.f32 %r6, {%r1, %r2, %r3, %r4}, %r5; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-NEXT: ret; |
| %val = call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.satfinite(<4 x float> %fvec, i32 %rbits) |
| ret <4 x i8> %val |
| } |
| |
| define <4 x i8> @cvt_rs_relu_sf_e5m2x4_f32(<4 x float> %fvec, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_relu_sf_e5m2x4_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [cvt_rs_relu_sf_e5m2x4_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r5, [cvt_rs_relu_sf_e5m2x4_f32_param_1]; |
| ; CHECK-NEXT: cvt.rs.relu.satfinite.e5m2x4.f32 %r6, {%r1, %r2, %r3, %r4}, %r5; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-NEXT: ret; |
| %val = call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.relu.satfinite(<4 x float> %fvec, i32 %rbits) |
| ret <4 x i8> %val |
| } |
| |
| ; F6X4 conversions |
| |
| define <4 x i8> @cvt_rs_sf_e2m3x4_f32(<4 x float> %fvec, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_sf_e2m3x4_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [cvt_rs_sf_e2m3x4_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r5, [cvt_rs_sf_e2m3x4_f32_param_1]; |
| ; CHECK-NEXT: cvt.rs.satfinite.e2m3x4.f32 %r6, {%r1, %r2, %r3, %r4}, %r5; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-NEXT: ret; |
| %val = call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.satfinite(<4 x float> %fvec, i32 %rbits) |
| ret <4 x i8> %val |
| } |
| |
| define <4 x i8> @cvt_rs_relu_sf_e2m3x4_f32(<4 x float> %fvec, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_relu_sf_e2m3x4_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [cvt_rs_relu_sf_e2m3x4_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r5, [cvt_rs_relu_sf_e2m3x4_f32_param_1]; |
| ; CHECK-NEXT: cvt.rs.relu.satfinite.e2m3x4.f32 %r6, {%r1, %r2, %r3, %r4}, %r5; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-NEXT: ret; |
| %val = call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.relu.satfinite(<4 x float> %fvec, i32 %rbits) |
| ret <4 x i8> %val |
| } |
| |
| define <4 x i8> @cvt_rs_sf_e3m2x4_f32(<4 x float> %fvec, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_sf_e3m2x4_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [cvt_rs_sf_e3m2x4_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r5, [cvt_rs_sf_e3m2x4_f32_param_1]; |
| ; CHECK-NEXT: cvt.rs.satfinite.e3m2x4.f32 %r6, {%r1, %r2, %r3, %r4}, %r5; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-NEXT: ret; |
| %val = call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.satfinite(<4 x float> %fvec, i32 %rbits) |
| ret <4 x i8> %val |
| } |
| |
| define <4 x i8> @cvt_rs_relu_sf_e3m2x4_f32(<4 x float> %fvec, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_relu_sf_e3m2x4_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [cvt_rs_relu_sf_e3m2x4_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r5, [cvt_rs_relu_sf_e3m2x4_f32_param_1]; |
| ; CHECK-NEXT: cvt.rs.relu.satfinite.e3m2x4.f32 %r6, {%r1, %r2, %r3, %r4}, %r5; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-NEXT: ret; |
| %val = call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.relu.satfinite(<4 x float> %fvec, i32 %rbits) |
| ret <4 x i8> %val |
| } |
| |
| ; F4X4 conversions |
| |
| define i16 @cvt_rs_sf_e2m1x4_f32(<4 x float> %fvec, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_sf_e2m1x4_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [cvt_rs_sf_e2m1x4_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r5, [cvt_rs_sf_e2m1x4_f32_param_1]; |
| ; CHECK-NEXT: cvt.rs.satfinite.e2m1x4.f32 %rs1, {%r1, %r2, %r3, %r4}, %r5; |
| ; CHECK-NEXT: cvt.u32.u16 %r6, %rs1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-NEXT: ret; |
| %val = call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.satfinite(<4 x float> %fvec, i32 %rbits) |
| ret i16 %val |
| } |
| |
| define i16 @cvt_rs_relu_sf_e2m1x4_f32(<4 x float> %fvec, i32 %rbits) { |
| ; CHECK-LABEL: cvt_rs_relu_sf_e2m1x4_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [cvt_rs_relu_sf_e2m1x4_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r5, [cvt_rs_relu_sf_e2m1x4_f32_param_1]; |
| ; CHECK-NEXT: cvt.rs.relu.satfinite.e2m1x4.f32 %rs1, {%r1, %r2, %r3, %r4}, %r5; |
| ; CHECK-NEXT: cvt.u32.u16 %r6, %rs1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-NEXT: ret; |
| %val = call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.relu.satfinite(<4 x float> %fvec, i32 %rbits) |
| ret i16 %val |
| } |