| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 %s -o - | FileCheck -check-prefixes=GFX942,GFX942-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 %s -o - | FileCheck -check-prefixes=GFX942,GFX942-GISEL %s |
| |
| |
| define void @memset_p0_varsize_align_4_varsetval(ptr addrspace(0) align 4 %dst, i8 %setval, i64 %size) { |
| ; GFX942-SDAG-LABEL: memset_p0_varsize_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v3 |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v12, -16, v10 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4 |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v8, 15, v10 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, 0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB0_3 |
| ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB0_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[14:15], v[0:1], 0, s[4:5] |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[12:13] |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[14:15], v[4:7] |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB0_2 |
| ; GFX942-SDAG-NEXT: .LBB0_3: ; %Flow4 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB0_6 |
| ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v10, -16, v10 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[10:11] |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB0_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[4:5] |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[8:9] |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: flat_store_byte v[4:5], v2 |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB0_5 |
| ; GFX942-SDAG-NEXT: .LBB0_6: ; %Flow2 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p0_varsize_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v3 |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v10 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v11, v4 |
| ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v12, vcc, v10, v8 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v13, vcc, 0, v11, vcc |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB0_3 |
| ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB0_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v16, vcc, v0, v14 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v17, vcc, v1, v15, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 16, v14 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[4:7] |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v15, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[14:15], v[12:13] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB0_2 |
| ; GFX942-GISEL-NEXT: .LBB0_3: ; %Flow4 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB0_6 |
| ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[4:5], 4, v[10:11] |
| ; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 4, v[0:1] |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB0_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v4 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v5, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v4, vcc, 1, v4 |
| ; GFX942-GISEL-NEXT: flat_store_byte v[6:7], v2 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[4:5], v[8:9] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB0_5 |
| ; GFX942-GISEL-NEXT: .LBB0_6: ; %Flow2 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p0.i64(ptr addrspace(0) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false) |
| ret void |
| } |
| |
| define void @memset_p1_varsize_align_4_varsetval(ptr addrspace(1) align 4 %dst, i8 %setval, i64 %size) { |
| ; GFX942-SDAG-LABEL: memset_p1_varsize_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v3 |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v12, -16, v10 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4 |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v8, 15, v10 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, 0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB1_3 |
| ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB1_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[14:15], v[0:1], 0, s[4:5] |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[12:13] |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[14:15], v[4:7], off |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB1_2 |
| ; GFX942-SDAG-NEXT: .LBB1_3: ; %Flow4 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB1_6 |
| ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v10, -16, v10 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[10:11] |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB1_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[4:5] |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[8:9] |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: global_store_byte v[4:5], v2, off |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB1_5 |
| ; GFX942-SDAG-NEXT: .LBB1_6: ; %Flow2 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p1_varsize_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v3 |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v10 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v11, v4 |
| ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v12, vcc, v10, v8 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v13, vcc, 0, v11, vcc |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB1_3 |
| ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB1_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v16, vcc, v0, v14 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v17, vcc, v1, v15, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 16, v14 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[16:17], v[4:7], off |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v15, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[14:15], v[12:13] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB1_2 |
| ; GFX942-GISEL-NEXT: .LBB1_3: ; %Flow4 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB1_6 |
| ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[4:5], 4, v[10:11] |
| ; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 4, v[0:1] |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB1_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v4 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v5, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v4, vcc, 1, v4 |
| ; GFX942-GISEL-NEXT: global_store_byte v[6:7], v2, off |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[4:5], v[8:9] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB1_5 |
| ; GFX942-GISEL-NEXT: .LBB1_6: ; %Flow2 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false) |
| ret void |
| } |
| |
| define void @memset_p3_varsize_align_4_varsetval(ptr addrspace(3) align 4 %dst, i8 %setval, i64 %size) { |
| ; GFX942-SDAG-LABEL: memset_p3_varsize_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v3 |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v4, -16, v2 |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v10, 15, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, 0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB2_3 |
| ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v6, v1, v1, s4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v6 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v6 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v6 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB2_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[4:5] |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v9, v8, v7 offset0:2 offset1:3 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v9, v6, v3 offset1:1 |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v9, 16, v9 |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB2_2 |
| ; GFX942-SDAG-NEXT: .LBB2_3: ; %Flow7 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB2_6 |
| ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2 |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, v0, v2 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB2_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[10:11] |
| ; GFX942-SDAG-NEXT: ds_write_b8 v0, v1 |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB2_5 |
| ; GFX942-SDAG-NEXT: .LBB2_6: ; %Flow5 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p3_varsize_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 15, v2 |
| ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v6, vcc, v2, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, 0 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v7, vcc, 0, v3, vcc |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB2_3 |
| ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v1 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v9, v8, 8, v8 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v8 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v8, v9, v10, v8 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, v8 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v8 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v11, v8 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v14, v0 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB2_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v14, v[8:9], v[10:11] offset1:1 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[6:7] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v14, 16, v14 |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB2_2 |
| ; GFX942-GISEL-NEXT: .LBB2_3: ; %Flow7 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB2_6 |
| ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3] |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v2 |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, v0, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB2_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2 |
| ; GFX942-GISEL-NEXT: ds_write_b8 v0, v1 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[4:5] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB2_5 |
| ; GFX942-GISEL-NEXT: .LBB2_6: ; %Flow5 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p3.i64(ptr addrspace(3) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false) |
| ret void |
| } |
| |
| define void @memset_p5_varsize_align_4_varsetval(ptr addrspace(5) align 4 %dst, i8 %setval, i64 %size) { |
| ; GFX942-SDAG-LABEL: memset_p5_varsize_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v3 |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v4, -16, v2 |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v10, 15, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, 0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB3_3 |
| ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v6, v1, v1, s4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v6 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v6 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v6 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB3_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[4:5] |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v3, v[6:9], off |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 16, v3 |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB3_2 |
| ; GFX942-SDAG-NEXT: .LBB3_3: ; %Flow7 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB3_6 |
| ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2 |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, v0, v2 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB3_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[10:11] |
| ; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB3_5 |
| ; GFX942-SDAG-NEXT: .LBB3_6: ; %Flow5 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p5_varsize_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v2 |
| ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v8 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v3, vcc |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB3_3 |
| ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v1 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v5, v4, 8, v4 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v4 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 24, v4 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v4, v5, v6, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v14, v0 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB3_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v14, v[4:7], off |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[10:11] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v14, 16, v14 |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB3_2 |
| ; GFX942-GISEL-NEXT: .LBB3_3: ; %Flow7 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB3_6 |
| ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3] |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v2 |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, v0, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB3_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2 |
| ; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[8:9] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB3_5 |
| ; GFX942-GISEL-NEXT: .LBB3_6: ; %Flow5 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false) |
| ret void |
| } |
| |
| define void @memset_p0_sz1055_align_4_varsetval(ptr addrspace(0) align 4 %dst, i8 %setval) { |
| ; GFX942-SDAG-LABEL: memset_p0_sz1055_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0 |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v34, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v35, v4 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[2:3], 0x70 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0x60 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[6:7], 0x50 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[8:9], 0xf0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[10:11], 0xe0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[12:13], 0xd0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[14:15], 0xc0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[16:17], 0xb0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[18:19], 0xa0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[20:21], 0x90 |
| ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[36:37], 0x400 |
| ; GFX942-SDAG-NEXT: .LBB4_1: ; %static-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[38:39], v[0:1], 0, s[0:1] |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[36:37] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[48:49], v[38:39], 0, s[2:3] |
| ; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[50:51], v[38:39], 0, s[4:5] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[52:53], v[38:39], 0, s[6:7] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[20:23] offset:64 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[54:55], v[38:39], 0, 48 |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[12:15] offset:32 |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[8:11] offset:16 |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[4:7] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[40:41], v[38:39], 0, s[8:9] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[42:43], v[38:39], 0, s[10:11] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[44:45], v[38:39], 0, s[12:13] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[46:47], v[38:39], 0, s[14:15] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[56:57], v[38:39], 0, s[16:17] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[58:59], v[38:39], 0, s[18:19] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[60:61], v[38:39], 0, s[20:21] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[4:7] offset:128 |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[48:49], v[32:35] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[50:51], v[28:31] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[52:53], v[24:27] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[54:55], v[16:19] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[40:41], v[32:35] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[42:43], v[28:31] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[44:45], v[24:27] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[46:47], v[20:23] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[56:57], v[16:19] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[58:59], v[12:15] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[60:61], v[8:11] |
| ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB4_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0 |
| ; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-SDAG-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:1024 |
| ; GFX942-SDAG-NEXT: flat_store_dwordx3 v[0:1], v[4:6] offset:1040 |
| ; GFX942-SDAG-NEXT: flat_store_short v[0:1], v3 offset:1052 |
| ; GFX942-SDAG-NEXT: flat_store_byte v[0:1], v2 offset:1054 |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p0_sz1055_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3 |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0x400 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB4_1: ; %static-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, v0, v10 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, v1, v11, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 0x100, v10 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:16 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:32 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:48 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:64 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:80 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:96 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:112 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:128 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:144 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:160 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:176 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:192 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:208 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:224 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:240 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v11, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[10:11], v[8:9] |
| ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB4_1 |
| ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v6, v4, v5, v3 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v6 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v8, v6 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, v6 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:1024 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx3 v[0:1], v[4:6] offset:1040 |
| ; GFX942-GISEL-NEXT: flat_store_short v[0:1], v3 offset:1052 |
| ; GFX942-GISEL-NEXT: flat_store_byte v[0:1], v2 offset:1054 |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p0.i64(ptr addrspace(0) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false) |
| ret void |
| } |
| |
| define void @memset_p0_sz2048_align_4_varsetval(ptr addrspace(0) align 4 %dst, i8 %setval) { |
| ; GFX942-SDAG-LABEL: memset_p0_sz2048_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v2, v2, v2, s0 |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[2:3], 0x70 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0x60 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[6:7], 0x50 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[8:9], 0xf0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[10:11], 0xe0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[12:13], 0xd0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[14:15], 0xc0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[16:17], 0xb0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[18:19], 0xa0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[20:21], 0x90 |
| ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800 |
| ; GFX942-SDAG-NEXT: .LBB5_1: ; %static-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[36:37], v[0:1], 0, s[0:1] |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[38:39], v[36:37], 0, s[2:3] |
| ; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[48:49], v[36:37], 0, s[4:5] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[50:51], v[36:37], 0, s[6:7] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[18:21] offset:64 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[52:53], v[36:37], 0, 48 |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[10:13] offset:32 |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[6:9] offset:16 |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[2:5] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[54:55], v[36:37], 0, s[8:9] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[40:41], v[36:37], 0, s[10:11] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[42:43], v[36:37], 0, s[12:13] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[44:45], v[36:37], 0, s[14:15] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[46:47], v[36:37], 0, s[16:17] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[56:57], v[36:37], 0, s[18:19] |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[58:59], v[36:37], 0, s[20:21] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[2:5] offset:128 |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[30:33] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[48:49], v[26:29] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[50:51], v[22:25] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[52:53], v[14:17] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[54:55], v[30:33] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[40:41], v[26:29] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[42:43], v[22:25] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[44:45], v[18:21] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[46:47], v[14:17] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[56:57], v[10:13] |
| ; GFX942-SDAG-NEXT: flat_store_dwordx4 v[58:59], v[6:9] |
| ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB5_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p0_sz2048_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB5_1: ; %static-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, v0, v8 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, v1, v9, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:16 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:32 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:48 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:64 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:80 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:96 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:112 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:128 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:144 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:160 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:176 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:192 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:208 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:224 |
| ; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:240 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] |
| ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB5_1 |
| ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p0.i64(ptr addrspace(0) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false) |
| ret void |
| } |
| |
| define void @memset_p1_sz1055_align_4_varsetval(ptr addrspace(1) align 4 %dst, i8 %setval) { |
| ; GFX942-SDAG-LABEL: memset_p1_sz1055_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v34, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v35, v4 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[36:37], 0x400 |
| ; GFX942-SDAG-NEXT: .LBB6_1: ; %static-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[38:39], v[0:1], 0, s[0:1] |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[36:37] |
| ; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[32:35], off offset:112 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[28:31], off offset:96 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[24:27], off offset:80 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[20:23], off offset:64 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[16:19], off offset:48 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[12:15], off offset:32 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[8:11], off offset:16 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[4:7], off |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[32:35], off offset:240 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[28:31], off offset:224 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[24:27], off offset:208 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[20:23], off offset:192 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[16:19], off offset:176 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[12:15], off offset:160 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[8:11], off offset:144 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[4:7], off offset:128 |
| ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB6_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0 |
| ; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-SDAG-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:1024 |
| ; GFX942-SDAG-NEXT: global_store_dwordx3 v[0:1], v[4:6], off offset:1040 |
| ; GFX942-SDAG-NEXT: global_store_short v[0:1], v3, off offset:1052 |
| ; GFX942-SDAG-NEXT: global_store_byte v[0:1], v2, off offset:1054 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p1_sz1055_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3 |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0x400 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB6_1: ; %static-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, v0, v10 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, v1, v11, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 0x100, v10 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:16 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:32 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:48 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:64 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:80 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:96 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:112 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:128 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:144 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:160 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:176 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:192 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:208 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:224 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:240 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v11, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[10:11], v[8:9] |
| ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB6_1 |
| ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v6, v4, v5, v3 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v6 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v8, v6 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, v6 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[0:1], v[6:9], off offset:1024 |
| ; GFX942-GISEL-NEXT: global_store_dwordx3 v[0:1], v[4:6], off offset:1040 |
| ; GFX942-GISEL-NEXT: global_store_short v[0:1], v3, off offset:1052 |
| ; GFX942-GISEL-NEXT: global_store_byte v[0:1], v2, off offset:1054 |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false) |
| ret void |
| } |
| |
| define void @memset_p1_sz2048_align_4_varsetval(ptr addrspace(1) align 4 %dst, i8 %setval) { |
| ; GFX942-SDAG-LABEL: memset_p1_sz2048_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v2, v2, v2, s0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800 |
| ; GFX942-SDAG-NEXT: .LBB7_1: ; %static-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[36:37], v[0:1], 0, s[0:1] |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] |
| ; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[30:33], off offset:112 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[26:29], off offset:96 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[22:25], off offset:80 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[18:21], off offset:64 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[14:17], off offset:48 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[10:13], off offset:32 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[6:9], off offset:16 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[2:5], off |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[30:33], off offset:240 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[26:29], off offset:224 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[22:25], off offset:208 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[18:21], off offset:192 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[14:17], off offset:176 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[10:13], off offset:160 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[6:9], off offset:144 |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[2:5], off offset:128 |
| ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB7_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p1_sz2048_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB7_1: ; %static-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, v0, v8 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, v1, v9, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:16 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:32 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:48 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:64 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:80 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:96 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:112 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:128 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:144 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:160 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:176 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:192 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:208 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:224 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:240 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] |
| ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB7_1 |
| ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false) |
| ret void |
| } |
| |
| define void @memset_p3_sz1055_align_4_varsetval(ptr addrspace(3) align 4 %dst, i8 %setval) { |
| ; GFX942-SDAG-LABEL: memset_p3_sz1055_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x400 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v36, v0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2 |
| ; GFX942-SDAG-NEXT: .LBB8_1: ; %static-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v32, v33 offset0:30 offset1:31 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v30, v31 offset0:28 offset1:29 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v28, v29 offset0:26 offset1:27 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v26, v27 offset0:24 offset1:25 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v24, v25 offset0:22 offset1:23 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v22, v23 offset0:20 offset1:21 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v20, v21 offset0:18 offset1:19 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v18, v19 offset0:16 offset1:17 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v16, v17 offset0:14 offset1:15 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v14, v15 offset0:12 offset1:13 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v12, v13 offset0:10 offset1:11 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v10, v11 offset0:8 offset1:9 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v8, v9 offset0:6 offset1:7 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v6, v7 offset0:4 offset1:5 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v4, v5 offset0:2 offset1:3 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v2, v3 offset1:1 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v32, v33 offset0:62 offset1:63 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v30, v31 offset0:60 offset1:61 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v28, v29 offset0:58 offset1:59 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v26, v27 offset0:56 offset1:57 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v24, v25 offset0:54 offset1:55 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v22, v23 offset0:52 offset1:53 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v20, v21 offset0:50 offset1:51 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v18, v19 offset0:48 offset1:49 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v16, v17 offset0:46 offset1:47 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v14, v15 offset0:44 offset1:45 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v12, v13 offset0:42 offset1:43 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v10, v11 offset0:40 offset1:41 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v8, v9 offset0:38 offset1:39 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v6, v7 offset0:36 offset1:37 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v4, v5 offset0:34 offset1:35 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v36, v2, v3 offset0:32 offset1:33 |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v36, 0x100, v36 |
| ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB8_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 0x400, v0 |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 0x408, v0 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v4, v1, v1, s0 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v3, v4, v4 offset1:1 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v2, v4, v4 offset1:1 |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 0x410, v0 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v2, v4, v4 offset1:1 |
| ; GFX942-SDAG-NEXT: ds_write_b32 v0, v4 offset:1048 |
| ; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v2, 8, v1 |
| ; GFX942-SDAG-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX942-SDAG-NEXT: ds_write_b16 v0, v2 offset:1052 |
| ; GFX942-SDAG-NEXT: ds_write_b8 v0, v1 offset:1054 |
| ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p3_sz1055_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x400 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v0 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB8_1: ; %static-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset1:1 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:2 offset1:3 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:4 offset1:5 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:6 offset1:7 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:8 offset1:9 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:10 offset1:11 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:12 offset1:13 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:14 offset1:15 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:16 offset1:17 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:18 offset1:19 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:20 offset1:21 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:22 offset1:23 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:24 offset1:25 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:26 offset1:27 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:28 offset1:29 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:30 offset1:31 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v10, 0x100, v10 |
| ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] |
| ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB8_1 |
| ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:128 offset1:129 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4 |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v5, 0x410, v0 |
| ; GFX942-GISEL-NEXT: ds_write2_b32 v5, v4, v4 offset1:1 |
| ; GFX942-GISEL-NEXT: ds_write_b32 v0, v2 offset:1048 |
| ; GFX942-GISEL-NEXT: ds_write_b16 v0, v3 offset:1052 |
| ; GFX942-GISEL-NEXT: ds_write_b8 v0, v1 offset:1054 |
| ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p3.i64(ptr addrspace(3) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false) |
| ret void |
| } |
| |
| define void @memset_p3_sz2048_align_4_varsetval(ptr addrspace(3) align 4 %dst, i8 %setval) { |
| ; GFX942-SDAG-LABEL: memset_p3_sz2048_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 |
| ; GFX942-SDAG-NEXT: .LBB9_1: ; %static-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v32, v31 offset0:30 offset1:31 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v30, v29 offset0:28 offset1:29 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v28, v27 offset0:26 offset1:27 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v26, v25 offset0:24 offset1:25 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v24, v23 offset0:22 offset1:23 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v22, v21 offset0:20 offset1:21 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v20, v19 offset0:18 offset1:19 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v18, v17 offset0:16 offset1:17 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v16, v15 offset0:14 offset1:15 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v14, v13 offset0:12 offset1:13 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v12, v11 offset0:10 offset1:11 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v10, v9 offset0:8 offset1:9 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v8, v7 offset0:6 offset1:7 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v6, v5 offset0:4 offset1:5 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v4, v3 offset0:2 offset1:3 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v2, v1 offset1:1 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v32, v31 offset0:62 offset1:63 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v30, v29 offset0:60 offset1:61 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v28, v27 offset0:58 offset1:59 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v26, v25 offset0:56 offset1:57 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v24, v23 offset0:54 offset1:55 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v22, v21 offset0:52 offset1:53 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v20, v19 offset0:50 offset1:51 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v18, v17 offset0:48 offset1:49 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v16, v15 offset0:46 offset1:47 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v14, v13 offset0:44 offset1:45 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v12, v11 offset0:42 offset1:43 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v10, v9 offset0:40 offset1:41 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v8, v7 offset0:38 offset1:39 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v6, v5 offset0:36 offset1:37 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v4, v3 offset0:34 offset1:35 |
| ; GFX942-SDAG-NEXT: ds_write2_b32 v0, v2, v1 offset0:32 offset1:33 |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 0x100, v0 |
| ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB9_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p3_sz2048_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v2, v1, 8, v1 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v1 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v2, v2, v3, v1 |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB9_1: ; %static-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:4 offset1:5 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:6 offset1:7 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:8 offset1:9 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:10 offset1:11 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:12 offset1:13 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:14 offset1:15 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:16 offset1:17 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:18 offset1:19 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:20 offset1:21 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:22 offset1:23 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:24 offset1:25 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:26 offset1:27 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:28 offset1:29 |
| ; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:30 offset1:31 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 0x100, v0 |
| ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] |
| ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB9_1 |
| ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p3.i64(ptr addrspace(3) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false) |
| ret void |
| } |
| |
| define void @memset_p5_sz1055_align_4_varsetval(ptr addrspace(5) align 4 %dst, i8 %setval) { |
| ; GFX942-SDAG-LABEL: memset_p5_sz1055_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x400 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v36, v0 |
| ; GFX942-SDAG-NEXT: .LBB10_1: ; %static-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[30:33], off offset:112 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[26:29], off offset:96 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[22:25], off offset:80 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[18:21], off offset:64 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[14:17], off offset:48 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[10:13], off offset:32 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[6:9], off offset:16 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[2:5], off |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[30:33], off offset:240 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[26:29], off offset:224 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[22:25], off offset:208 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[18:21], off offset:192 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[14:17], off offset:176 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[10:13], off offset:160 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[6:9], off offset:144 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[2:5], off offset:128 |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v36, 0x100, v36 |
| ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB10_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:1024 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx2 v0, v[2:3], off offset:1040 |
| ; GFX942-SDAG-NEXT: scratch_store_dword v0, v2, off offset:1048 |
| ; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v2, 8, v1 |
| ; GFX942-SDAG-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX942-SDAG-NEXT: scratch_store_short v0, v2, off offset:1052 |
| ; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1054 |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p5_sz1055_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x400 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v0 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB10_1: ; %static-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:16 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:32 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:48 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:64 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:80 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:96 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:112 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:128 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:144 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:160 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:176 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:192 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:208 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:224 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:240 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v10, 0x100, v10 |
| ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] |
| ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB10_1 |
| ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:1024 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx2 v0, v[4:5], off offset:1040 |
| ; GFX942-GISEL-NEXT: scratch_store_dword v0, v2, off offset:1048 |
| ; GFX942-GISEL-NEXT: scratch_store_short v0, v3, off offset:1052 |
| ; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off offset:1054 |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false) |
| ret void |
| } |
| |
| define void @memset_p5_sz2048_align_4_varsetval(ptr addrspace(5) align 4 %dst, i8 %setval) { |
| ; GFX942-SDAG-LABEL: memset_p5_sz2048_align_4_varsetval: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404 |
| ; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800 |
| ; GFX942-SDAG-NEXT: .LBB11_1: ; %static-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35] |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[30:33], off offset:112 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[26:29], off offset:96 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[22:25], off offset:80 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[18:21], off offset:64 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[14:17], off offset:48 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:32 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:16 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[2:5], off |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[30:33], off offset:240 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[26:29], off offset:224 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[22:25], off offset:208 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[18:21], off offset:192 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[14:17], off offset:176 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:160 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:144 |
| ; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:128 |
| ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 0x100, v0 |
| ; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB11_1 |
| ; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p5_sz2048_align_4_varsetval: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX942-GISEL-NEXT: v_lshl_or_b32 v2, v1, 8, v1 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v1 |
| ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1 |
| ; GFX942-GISEL-NEXT: v_or3_b32 v2, v2, v3, v1 |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB11_1: ; %static-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:16 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:32 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:48 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:64 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:80 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:96 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:112 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:128 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:144 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:160 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:176 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:192 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:208 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:224 |
| ; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:240 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 0x100, v0 |
| ; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7] |
| ; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB11_1 |
| ; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false) |
| ret void |
| } |
| |
| define void @memset_p1_varsz_align_4_set40(ptr addrspace(1) align 4 %dst, i64 %size) { |
| ; GFX942-SDAG-LABEL: memset_p1_varsz_align_4_set40: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v10, -16, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v3 |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v8, 15, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, 0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB12_3 |
| ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, 0x28282828 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB12_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[12:13], v[0:1], 0, s[4:5] |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[10:11] |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[12:13], v[4:7], off |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB12_2 |
| ; GFX942-SDAG-NEXT: .LBB12_3: ; %Flow4 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB12_6 |
| ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 40 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB12_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[4:5] |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[8:9] |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: global_store_byte v[4:5], v2, off |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB12_5 |
| ; GFX942-SDAG-NEXT: .LBB12_6: ; %Flow2 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p1_varsz_align_4_set40: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v2 |
| ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v8 |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v3, vcc |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB12_3 |
| ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-GISEL-NEXT: s_mov_b32 s4, 0x28282828 |
| ; GFX942-GISEL-NEXT: s_mov_b32 s5, s4 |
| ; GFX942-GISEL-NEXT: s_mov_b32 s6, s4 |
| ; GFX942-GISEL-NEXT: s_mov_b32 s7, s4 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[4:5] |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[6:7] |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB12_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, v0, v12 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, v1, v13, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[14:15], v[4:7], off |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[10:11] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB12_2 |
| ; GFX942-GISEL-NEXT: .LBB12_3: ; %Flow4 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB12_6 |
| ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3] |
| ; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 4, v[0:1] |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, 40 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB12_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v2 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v3, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2 |
| ; GFX942-GISEL-NEXT: global_store_byte v[6:7], v4, off |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[8:9] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB12_5 |
| ; GFX942-GISEL-NEXT: .LBB12_6: ; %Flow2 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 40, i64 %size, i1 false) |
| ret void |
| } |
| |
| define void @memset_p1_varsz_align_4_set0(ptr addrspace(1) align 4 %dst, i64 %size) { |
| ; GFX942-SDAG-LABEL: memset_p1_varsz_align_4_set0: |
| ; GFX942-SDAG: ; %bb.0: ; %entry |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v6, -16, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v3 |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v4, 15, v2 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, 0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB13_3 |
| ; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v5 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v5 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v5 |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v5 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB13_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[12:13], v[0:1], 0, s[4:5] |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[6:7] |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: global_store_dwordx4 v[12:13], v[8:11], off |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB13_2 |
| ; GFX942-SDAG-NEXT: .LBB13_3: ; %Flow4 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] |
| ; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-SDAG-NEXT: s_cbranch_execz .LBB13_6 |
| ; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] |
| ; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-SDAG-NEXT: .LBB13_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[4:5] |
| ; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1 |
| ; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0 |
| ; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[4:5] |
| ; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-SDAG-NEXT: global_store_byte v[6:7], v2, off |
| ; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB13_5 |
| ; GFX942-SDAG-NEXT: .LBB13_6: ; %Flow2 |
| ; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-GISEL-LABEL: memset_p1_varsz_align_4_set0: |
| ; GFX942-GISEL: ; %bb.0: ; %entry |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v2 |
| ; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v8 |
| ; GFX942-GISEL-NEXT: s_mov_b32 s0, 0 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v3, vcc |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0 |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB13_3 |
| ; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader |
| ; GFX942-GISEL-NEXT: s_mov_b32 s2, s0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 s3, s0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 s1, s0 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[4:5] |
| ; GFX942-GISEL-NEXT: .LBB13_2: ; %dynamic-memset-expansion-main-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, v0, v12 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, v1, v13, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12 |
| ; GFX942-GISEL-NEXT: global_store_dwordx4 v[14:15], v[4:7], off |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[10:11] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB13_2 |
| ; GFX942-GISEL-NEXT: .LBB13_3: ; %Flow4 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] |
| ; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GFX942-GISEL-NEXT: s_cbranch_execz .LBB13_6 |
| ; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader |
| ; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3] |
| ; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 4, v[0:1] |
| ; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, 0 |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-GISEL-NEXT: .LBB13_5: ; %dynamic-memset-expansion-residual-body |
| ; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v2 |
| ; GFX942-GISEL-NEXT: s_nop 1 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v3, vcc |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2 |
| ; GFX942-GISEL-NEXT: global_store_byte v[6:7], v4, off |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[8:9] |
| ; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB13_5 |
| ; GFX942-GISEL-NEXT: .LBB13_6: ; %Flow2 |
| ; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 0, i64 %size, i1 false) |
| ret void |
| } |
| |
| declare void @llvm.memset.p0.i64(ptr addrspace(0) noalias nocapture writeonly, i8, i64, i1 immarg) |
| declare void @llvm.memset.p1.i64(ptr addrspace(1) noalias nocapture writeonly, i8, i64, i1 immarg) |
| declare void @llvm.memset.p3.i64(ptr addrspace(3) noalias nocapture writeonly, i8, i64, i1 immarg) |
| declare void @llvm.memset.p5.i64(ptr addrspace(5) noalias nocapture writeonly, i8, i64, i1 immarg) |
| |
| |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; GFX942: {{.*}} |