blob: fd51759f50d487cc8228c77f7ce5b5923052c232 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
declare i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float, float, i32, i1)
declare i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float, i32, i32, i32)
declare float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32, i32)
define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) {
; GFX1250-TRUE16-LABEL: test_cvt_pk_fp8_f32_word0:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: v_cvt_pk_fp8_f32 v2.l, v0, v1 clamp
; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: test_cvt_pk_fp8_f32_word0:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 clamp
; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-FAKE16-NEXT: v_mov_b32_e32 v0, v2
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_cvt_pk_fp8_f32_word0:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 clamp
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, v2
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float %x, float %y, i32 %old, i1 false)
ret i32 %ret
}
define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) {
; GFX1250-TRUE16-LABEL: test_cvt_pk_fp8_f32_word1:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: v_cvt_pk_fp8_f32 v2.h, v0, v1 op_sel:[0,0,1] clamp
; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: test_cvt_pk_fp8_f32_word1:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] clamp
; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-FAKE16-NEXT: v_mov_b32_e32 v0, v2
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_cvt_pk_fp8_f32_word1:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] clamp
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, v2
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float %x, float %y, i32 %old, i1 true)
ret i32 %ret
}
define amdgpu_cs void @test_cvt_pk_fp8_f32_word1_dpp(i32 %a, float %y, i32 %old, ptr addrspace(1) %out) {
; GFX1250-TRUE16-LABEL: test_cvt_pk_fp8_f32_word1_dpp:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-TRUE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-TRUE16-NEXT: v_cvt_pk_fp8_f32 v2.h, v0, v1 op_sel:[0,0,1] clamp
; GFX1250-TRUE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: test_cvt_pk_fp8_f32_word1_dpp:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-FAKE16-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] clamp
; GFX1250-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: test_cvt_pk_fp8_f32_word1_dpp:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-GISEL-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1] clamp
; GFX1250-GISEL-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-NEXT: s_endpgm
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%tmp1 = bitcast i32 %tmp0 to float
%ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float %tmp1, float %y, i32 %old, i1 true)
store i32 %ret, ptr addrspace(1) %out
ret void
}
define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) {
; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte0:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 clamp
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b32_e32 v0, v2
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float %x, i32 %r, i32 %old, i32 0)
ret i32 %ret
}
define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) {
; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte1:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:1 clamp
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b32_e32 v0, v2
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float %x, i32 %r, i32 %old, i32 1)
ret i32 %ret
}
define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) {
; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte2:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:2 clamp
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b32_e32 v0, v2
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float %x, i32 %r, i32 %old, i32 2)
ret i32 %ret
}
define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
; GFX1250-LABEL: test_cvt_sr_fp8_f32_byte3:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:3 clamp
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b32_e32 v0, v2
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float %x, i32 %r, i32 %old, i32 3)
ret i32 %ret
}
define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1_dpp(i32 %a, i32 %r, i32 %old, ptr addrspace(1) %out) {
; GFX1250-TRUE16-LABEL: test_cvt_sr_fp8_f32_byte1_dpp:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-TRUE16-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 clamp quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-TRUE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: test_cvt_sr_fp8_f32_byte1_dpp:
; GFX1250-FAKE16: ; %bb.0:
; GFX1250-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GFX1250-FAKE16-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 clamp quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-FAKE16-NEXT: global_store_b32 v[4:5], v2, off
; GFX1250-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: test_cvt_sr_fp8_f32_byte1_dpp:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GFX1250-GISEL-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 clamp quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX1250-GISEL-NEXT: global_store_b32 v[6:7], v2, off
; GFX1250-GISEL-NEXT: s_endpgm
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%tmp1 = bitcast i32 %tmp0 to float
%ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float %tmp1, i32 %r, i32 %old, i32 1)
store i32 %ret, ptr addrspace(1) %out
ret void
}
define float @test_cvt_f32_fp8_e5m3_byte0(i32 %a) {
; GFX1250-LABEL: test_cvt_f32_fp8_e5m3_byte0:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_f32_fp8_e64 v0, v0 clamp
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 %a, i32 0)
ret float %ret
}
define float @test_cvt_f32_fp8_e5m3_byte1(i32 %a) {
; GFX1250-LABEL: test_cvt_f32_fp8_e5m3_byte1:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1 clamp
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 %a, i32 1)
ret float %ret
}
define float @test_cvt_f32_fp8_e5m3_byte2(i32 %a) {
; GFX1250-LABEL: test_cvt_f32_fp8_e5m3_byte2:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:2 clamp
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 %a, i32 2)
ret float %ret
}
define float @test_cvt_f32_fp8_e5m3_byte3(i32 %a) {
; GFX1250-LABEL: test_cvt_f32_fp8_e5m3_byte3:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:3 clamp
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 %a, i32 3)
ret float %ret
}