| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s |
| |
| declare <2 x i32> @llvm.amdgcn.perm.pk16.b4.u4(i32, i32, <2 x i32>) |
| declare <3 x i32> @llvm.amdgcn.perm.pk16.b6.u4(i32, i64, <2 x i32>) |
| declare <4 x i32> @llvm.amdgcn.perm.pk16.b8.u4(i64, i64, <2 x i32>) |
| |
| define void @test_perm_pk16_b4_u4(i32 %a, i32 %b, <2 x i32> %c, ptr %out) { |
| ; GFX1250-LABEL: test_perm_pk16_b4_u4: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_perm_pk16_b4_u4 v[0:1], v0, v1, v[2:3] |
| ; GFX1250-NEXT: flat_store_b64 v[4:5], v[0:1] |
| ; GFX1250-NEXT: s_wait_dscnt 0x0 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %ret = tail call <2 x i32> @llvm.amdgcn.perm.pk16.b4.u4(i32 %a, i32 %b, <2 x i32> %c) |
| store <2 x i32> %ret, ptr %out, align 8 |
| ret void |
| } |
| |
| define void @test_perm_pk16_b6_u4(i32 %a, i64 %b, <2 x i32> %c, ptr %out) { |
| ; GFX1250-SDAG-LABEL: test_perm_pk16_b6_u4: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v9, v4 |
| ; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v3, v2 |
| ; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v6, v5 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-NEXT: v_perm_pk16_b6_u4 v[0:2], v0, v[2:3], v[8:9] |
| ; GFX1250-SDAG-NEXT: flat_store_b96 v[6:7], v[0:2] |
| ; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX1250-GISEL-LABEL: test_perm_pk16_b6_u4: |
| ; GFX1250-GISEL: ; %bb.0: |
| ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v9, v2 |
| ; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4 |
| ; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v5 :: v_dual_mov_b32 v5, v6 |
| ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX1250-GISEL-NEXT: v_perm_pk16_b6_u4 v[0:2], v0, v[8:9], v[2:3] |
| ; GFX1250-GISEL-NEXT: flat_store_b96 v[4:5], v[0:2] |
| ; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0 |
| ; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] |
| %ret = tail call <3 x i32> @llvm.amdgcn.perm.pk16.b6.u4(i32 %a, i64 %b, <2 x i32> %c) |
| store <3 x i32> %ret, ptr %out, align 16 |
| ret void |
| } |
| |
| define void @test_perm_pk16_b8_u4(i64 %a, i64 %b, <2 x i32> %c, ptr %out) { |
| ; GFX1250-LABEL: test_perm_pk16_b8_u4: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_perm_pk16_b8_u4 v[0:3], v[0:1], v[2:3], v[4:5] |
| ; GFX1250-NEXT: flat_store_b128 v[6:7], v[0:3] |
| ; GFX1250-NEXT: s_wait_dscnt 0x0 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %ret = tail call <4 x i32> @llvm.amdgcn.perm.pk16.b8.u4(i64 %a, i64 %b, <2 x i32> %c) |
| store <4 x i32> %ret, ptr %out, align 16 |
| ret void |
| } |