| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s |
| |
| define protected amdgpu_kernel void @no_folding_imm_to_inst_with_fi(<4 x i64> %val4, <16 x i64> %val16) { |
| ; CHECK-LABEL: no_folding_imm_to_inst_with_fi: |
| ; CHECK: ; %bb.0: ; %bb |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: s_load_b256 s[36:43], s[4:5], 0x24 |
| ; CHECK-NEXT: s_load_b512 s[16:31], s[4:5], 0xe4 |
| ; CHECK-NEXT: s_load_b512 s[0:15], s[4:5], 0xa4 |
| ; CHECK-NEXT: s_mov_b64 s[34:35], src_private_base |
| ; CHECK-NEXT: s_movk_i32 s34, 0x80 |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; CHECK-NEXT: s_add_nc_u64 s[44:45], s[34:35], 0x70 |
| ; CHECK-NEXT: v_dual_mov_b32 v26, s34 :: v_dual_mov_b32 v27, s35 |
| ; CHECK-NEXT: v_dual_mov_b32 v20, s44 :: v_dual_mov_b32 v21, s45 |
| ; CHECK-NEXT: s_wait_kmcnt 0x0 |
| ; CHECK-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v1, s41 |
| ; CHECK-NEXT: v_dual_mov_b32 v2, s42 :: v_dual_mov_b32 v3, s43 |
| ; CHECK-NEXT: v_dual_mov_b32 v4, s36 :: v_dual_mov_b32 v5, s37 |
| ; CHECK-NEXT: v_dual_mov_b32 v6, s38 :: v_dual_mov_b32 v7, s39 |
| ; CHECK-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 |
| ; CHECK-NEXT: s_add_nc_u64 s[24:25], s[34:35], 0x60 |
| ; CHECK-NEXT: v_dual_mov_b32 v8, s28 :: v_dual_mov_b32 v9, s29 |
| ; CHECK-NEXT: v_dual_mov_b32 v10, s30 :: v_dual_mov_b32 v11, s31 |
| ; CHECK-NEXT: v_dual_mov_b32 v16, s20 :: v_dual_mov_b32 v17, s21 |
| ; CHECK-NEXT: s_add_nc_u64 s[20:21], s[34:35], 0x50 |
| ; CHECK-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 |
| ; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; CHECK-NEXT: v_dual_mov_b32 v22, s24 :: v_dual_mov_b32 v23, s25 |
| ; CHECK-NEXT: v_dual_mov_b32 v18, s22 :: v_dual_mov_b32 v19, s23 |
| ; CHECK-NEXT: v_dual_mov_b32 v25, s21 :: v_dual_mov_b32 v24, s20 |
| ; CHECK-NEXT: scratch_store_b128 off, v[0:3], off offset:16 scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-NEXT: scratch_store_b128 off, v[4:7], off scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-NEXT: flat_store_b128 v[20:21], v[8:11] scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-NEXT: flat_store_b128 v[22:23], v[12:15] scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-NEXT: flat_store_b128 v[24:25], v[16:19] scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13 |
| ; CHECK-NEXT: s_add_nc_u64 s[12:13], s[34:35], 48 |
| ; CHECK-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 |
| ; CHECK-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 |
| ; CHECK-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15 |
| ; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; CHECK-NEXT: v_dual_mov_b32 v29, s13 :: v_dual_mov_b32 v28, s12 |
| ; CHECK-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 |
| ; CHECK-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 |
| ; CHECK-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5 |
| ; CHECK-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7 |
| ; CHECK-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 |
| ; CHECK-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 |
| ; CHECK-NEXT: flat_store_b128 v[26:27], v[0:3] offset:64 scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-NEXT: flat_store_b128 v[28:29], v[4:7] scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-NEXT: flat_store_b128 v[26:27], v[8:11] offset:32 scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-NEXT: flat_store_b128 v[26:27], v[12:15] offset:16 scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-NEXT: flat_store_b128 v[26:27], v[16:19] scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-NEXT: flat_load_b128 v[0:3], v[22:23] scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; CHECK-NEXT: flat_load_b128 v[0:3], v[20:21] scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; CHECK-NEXT: flat_load_b128 v[0:3], v[26:27] offset:64 scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; CHECK-NEXT: flat_load_b128 v[0:3], v[24:25] scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; CHECK-NEXT: flat_load_b128 v[0:3], v[26:27] offset:32 scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; CHECK-NEXT: flat_load_b128 v[0:3], v[28:29] scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; CHECK-NEXT: flat_load_b128 v[0:3], v[26:27] scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; CHECK-NEXT: flat_load_b128 v[0:3], v[26:27] offset:16 scope:SCOPE_SYS |
| ; CHECK-NEXT: s_wait_loadcnt 0x0 |
| ; CHECK-NEXT: s_endpgm |
| bb: |
| %alloca = alloca <4 x i64>, align 32, addrspace(5) |
| %alloca1 = alloca <16 x i64>, align 128, addrspace(5) |
| store volatile <4 x i64> %val4, ptr addrspace(5) %alloca |
| %ascast = addrspacecast ptr addrspace(5) %alloca1 to ptr |
| store volatile <16 x i64> %val16, ptr %ascast |
| %load = load volatile <16 x i64>, ptr %ascast |
| ret void |
| } |