blob: 990a986ffab75e3e3786ae2932dabfc1cf7554b5 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 %s -o - | FileCheck -check-prefixes=GFX942,GFX942-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 %s -o - | FileCheck -check-prefixes=GFX942,GFX942-GISEL %s
define void @memset_p0_varsize_align_4_varsetval(ptr addrspace(0) align 4 %dst, i8 %setval, i64 %size) {
; GFX942-SDAG-LABEL: memset_p0_varsize_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v3
; GFX942-SDAG-NEXT: v_and_b32_e32 v12, -16, v10
; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4
; GFX942-SDAG-NEXT: v_and_b32_e32 v8, 15, v10
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, 0
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB0_3
; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB0_2: ; %dynamic-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[14:15], v[0:1], 0, s[4:5]
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[12:13]
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[14:15], v[4:7]
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB0_2
; GFX942-SDAG-NEXT: .LBB0_3: ; %Flow4
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB0_6
; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-SDAG-NEXT: v_and_b32_e32 v10, -16, v10
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[10:11]
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB0_5: ; %dynamic-memset-expansion-residual-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[4:5]
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[8:9]
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: flat_store_byte v[4:5], v2
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB0_5
; GFX942-SDAG-NEXT: .LBB0_6: ; %Flow2
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p0_varsize_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v3
; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v10
; GFX942-GISEL-NEXT: v_mov_b32_e32 v11, v4
; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v12, vcc, v10, v8
; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v13, vcc, 0, v11, vcc
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB0_3
; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2
; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[0:1]
; GFX942-GISEL-NEXT: .LBB0_2: ; %dynamic-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v16, vcc, v0, v14
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v17, vcc, v1, v15, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 16, v14
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[4:7]
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v15, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[14:15], v[12:13]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB0_2
; GFX942-GISEL-NEXT: .LBB0_3: ; %Flow4
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB0_6
; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-GISEL-NEXT: v_lshrrev_b64 v[4:5], 4, v[10:11]
; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 4, v[0:1]
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
; GFX942-GISEL-NEXT: .LBB0_5: ; %dynamic-memset-expansion-residual-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v4
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v5, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v4, vcc, 1, v4
; GFX942-GISEL-NEXT: flat_store_byte v[6:7], v2
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[4:5], v[8:9]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB0_5
; GFX942-GISEL-NEXT: .LBB0_6: ; %Flow2
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p0.i64(ptr addrspace(0) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false)
ret void
}
define void @memset_p1_varsize_align_4_varsetval(ptr addrspace(1) align 4 %dst, i8 %setval, i64 %size) {
; GFX942-SDAG-LABEL: memset_p1_varsize_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v3
; GFX942-SDAG-NEXT: v_and_b32_e32 v12, -16, v10
; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4
; GFX942-SDAG-NEXT: v_and_b32_e32 v8, 15, v10
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, 0
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB1_3
; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB1_2: ; %dynamic-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[14:15], v[0:1], 0, s[4:5]
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[12:13]
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: global_store_dwordx4 v[14:15], v[4:7], off
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB1_2
; GFX942-SDAG-NEXT: .LBB1_3: ; %Flow4
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB1_6
; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-SDAG-NEXT: v_and_b32_e32 v10, -16, v10
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[10:11]
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB1_5: ; %dynamic-memset-expansion-residual-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[4:5]
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[8:9]
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: global_store_byte v[4:5], v2, off
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB1_5
; GFX942-SDAG-NEXT: .LBB1_6: ; %Flow2
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p1_varsize_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v3
; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v10
; GFX942-GISEL-NEXT: v_mov_b32_e32 v11, v4
; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v12, vcc, v10, v8
; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v13, vcc, 0, v11, vcc
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB1_3
; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2
; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[0:1]
; GFX942-GISEL-NEXT: .LBB1_2: ; %dynamic-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v16, vcc, v0, v14
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v17, vcc, v1, v15, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 16, v14
; GFX942-GISEL-NEXT: global_store_dwordx4 v[16:17], v[4:7], off
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v15, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[14:15], v[12:13]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB1_2
; GFX942-GISEL-NEXT: .LBB1_3: ; %Flow4
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB1_6
; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-GISEL-NEXT: v_lshrrev_b64 v[4:5], 4, v[10:11]
; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 4, v[0:1]
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
; GFX942-GISEL-NEXT: .LBB1_5: ; %dynamic-memset-expansion-residual-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v4
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v5, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v4, vcc, 1, v4
; GFX942-GISEL-NEXT: global_store_byte v[6:7], v2, off
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[4:5], v[8:9]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB1_5
; GFX942-GISEL-NEXT: .LBB1_6: ; %Flow2
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false)
ret void
}
define void @memset_p3_varsize_align_4_varsetval(ptr addrspace(3) align 4 %dst, i8 %setval, i64 %size) {
; GFX942-SDAG-LABEL: memset_p3_varsize_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v3
; GFX942-SDAG-NEXT: v_and_b32_e32 v4, -16, v2
; GFX942-SDAG-NEXT: v_and_b32_e32 v10, 15, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, 0
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB2_3
; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v6, v1, v1, s4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v6
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v6
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v6
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v0
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB2_2: ; %dynamic-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[4:5]
; GFX942-SDAG-NEXT: ds_write2_b32 v9, v8, v7 offset0:2 offset1:3
; GFX942-SDAG-NEXT: ds_write2_b32 v9, v6, v3 offset1:1
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: v_add_u32_e32 v9, 16, v9
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB2_2
; GFX942-SDAG-NEXT: .LBB2_3: ; %Flow7
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB2_6
; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2
; GFX942-SDAG-NEXT: v_add_u32_e32 v0, v0, v2
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB2_5: ; %dynamic-memset-expansion-residual-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[10:11]
; GFX942-SDAG-NEXT: ds_write_b8 v0, v1
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB2_5
; GFX942-SDAG-NEXT: .LBB2_6: ; %Flow5
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p3_varsize_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 15, v2
; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v6, vcc, v2, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, 0
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v7, vcc, 0, v3, vcc
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB2_3
; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v1
; GFX942-GISEL-NEXT: v_lshl_or_b32 v9, v8, 8, v8
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8
; GFX942-GISEL-NEXT: v_or3_b32 v8, v9, v10, v8
; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, v8
; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v8
; GFX942-GISEL-NEXT: v_mov_b32_e32 v11, v8
; GFX942-GISEL-NEXT: v_mov_b32_e32 v14, v0
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1]
; GFX942-GISEL-NEXT: .LBB2_2: ; %dynamic-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12
; GFX942-GISEL-NEXT: ds_write2_b64 v14, v[8:9], v[10:11] offset1:1
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[6:7]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: v_add_u32_e32 v14, 16, v14
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB2_2
; GFX942-GISEL-NEXT: .LBB2_3: ; %Flow7
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3]
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v2
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, v0, v2
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-GISEL-NEXT: .LBB2_5: ; %dynamic-memset-expansion-residual-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2
; GFX942-GISEL-NEXT: ds_write_b8 v0, v1
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[4:5]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB2_5
; GFX942-GISEL-NEXT: .LBB2_6: ; %Flow5
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p3.i64(ptr addrspace(3) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false)
ret void
}
define void @memset_p5_varsize_align_4_varsetval(ptr addrspace(5) align 4 %dst, i8 %setval, i64 %size) {
; GFX942-SDAG-LABEL: memset_p5_varsize_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v3
; GFX942-SDAG-NEXT: v_and_b32_e32 v4, -16, v2
; GFX942-SDAG-NEXT: v_and_b32_e32 v10, 15, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, 0
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB3_3
; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-SDAG-NEXT: s_mov_b32 s4, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v6, v1, v1, s4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v6
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v6
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v6
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v0
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB3_2: ; %dynamic-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[4:5]
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v3, v[6:9], off
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 16, v3
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB3_2
; GFX942-SDAG-NEXT: .LBB3_3: ; %Flow7
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB3_6
; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2
; GFX942-SDAG-NEXT: v_add_u32_e32 v0, v0, v2
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB3_5: ; %dynamic-memset-expansion-residual-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[10:11]
; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB3_5
; GFX942-SDAG-NEXT: .LBB3_6: ; %Flow5
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p5_varsize_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v2
; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v8
; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v3, vcc
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB3_3
; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v1
; GFX942-GISEL-NEXT: v_lshl_or_b32 v5, v4, 8, v4
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 24, v4
; GFX942-GISEL-NEXT: v_or3_b32 v4, v5, v6, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v14, v0
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1]
; GFX942-GISEL-NEXT: .LBB3_2: ; %dynamic-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v14, v[4:7], off
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[10:11]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: v_add_u32_e32 v14, 16, v14
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB3_2
; GFX942-GISEL-NEXT: .LBB3_3: ; %Flow7
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB3_6
; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3]
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v2
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, v0, v2
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-GISEL-NEXT: .LBB3_5: ; %dynamic-memset-expansion-residual-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2
; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[8:9]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB3_5
; GFX942-GISEL-NEXT: .LBB3_6: ; %Flow5
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef nonnull align 4 %dst, i8 %setval, i64 %size, i1 false)
ret void
}
define void @memset_p0_sz1055_align_4_varsetval(ptr addrspace(0) align 4 %dst, i8 %setval) {
; GFX942-SDAG-LABEL: memset_p0_sz1055_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v34, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v35, v4
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: s_mov_b64 s[2:3], 0x70
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0x60
; GFX942-SDAG-NEXT: s_mov_b64 s[6:7], 0x50
; GFX942-SDAG-NEXT: s_mov_b64 s[8:9], 0xf0
; GFX942-SDAG-NEXT: s_mov_b64 s[10:11], 0xe0
; GFX942-SDAG-NEXT: s_mov_b64 s[12:13], 0xd0
; GFX942-SDAG-NEXT: s_mov_b64 s[14:15], 0xc0
; GFX942-SDAG-NEXT: s_mov_b64 s[16:17], 0xb0
; GFX942-SDAG-NEXT: s_mov_b64 s[18:19], 0xa0
; GFX942-SDAG-NEXT: s_mov_b64 s[20:21], 0x90
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[36:37], 0x400
; GFX942-SDAG-NEXT: .LBB4_1: ; %static-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[38:39], v[0:1], 0, s[0:1]
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[36:37]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[48:49], v[38:39], 0, s[2:3]
; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[50:51], v[38:39], 0, s[4:5]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[52:53], v[38:39], 0, s[6:7]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[20:23] offset:64
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[54:55], v[38:39], 0, 48
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[12:15] offset:32
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[8:11] offset:16
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[4:7]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[40:41], v[38:39], 0, s[8:9]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[42:43], v[38:39], 0, s[10:11]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[44:45], v[38:39], 0, s[12:13]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[46:47], v[38:39], 0, s[14:15]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[56:57], v[38:39], 0, s[16:17]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[58:59], v[38:39], 0, s[18:19]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[60:61], v[38:39], 0, s[20:21]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[4:7] offset:128
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[48:49], v[32:35]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[50:51], v[28:31]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[52:53], v[24:27]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[54:55], v[16:19]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[40:41], v[32:35]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[42:43], v[28:31]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[44:45], v[24:27]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[46:47], v[20:23]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[56:57], v[16:19]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[58:59], v[12:15]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[60:61], v[8:11]
; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB4_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0
; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4
; GFX942-SDAG-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:1024
; GFX942-SDAG-NEXT: flat_store_dwordx3 v[0:1], v[4:6] offset:1040
; GFX942-SDAG-NEXT: flat_store_short v[0:1], v3 offset:1052
; GFX942-SDAG-NEXT: flat_store_byte v[0:1], v2 offset:1054
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p0_sz1055_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2
; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0x400
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[0:1]
; GFX942-GISEL-NEXT: .LBB4_1: ; %static-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, v0, v10
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, v1, v11, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 0x100, v10
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:16
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:32
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:48
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:64
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:80
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:96
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:112
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:128
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:144
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:160
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:176
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:192
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:208
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:224
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[4:7] offset:240
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v11, vcc
; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[10:11], v[8:9]
; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB4_1
; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2
; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; GFX942-GISEL-NEXT: v_or3_b32 v6, v4, v5, v3
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8
; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3
; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v6
; GFX942-GISEL-NEXT: v_mov_b32_e32 v8, v6
; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, v6
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:1024
; GFX942-GISEL-NEXT: flat_store_dwordx3 v[0:1], v[4:6] offset:1040
; GFX942-GISEL-NEXT: flat_store_short v[0:1], v3 offset:1052
; GFX942-GISEL-NEXT: flat_store_byte v[0:1], v2 offset:1054
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p0.i64(ptr addrspace(0) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false)
ret void
}
define void @memset_p0_sz2048_align_4_varsetval(ptr addrspace(0) align 4 %dst, i8 %setval) {
; GFX942-SDAG-LABEL: memset_p0_sz2048_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v2, v2, v2, s0
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: s_mov_b64 s[2:3], 0x70
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0x60
; GFX942-SDAG-NEXT: s_mov_b64 s[6:7], 0x50
; GFX942-SDAG-NEXT: s_mov_b64 s[8:9], 0xf0
; GFX942-SDAG-NEXT: s_mov_b64 s[10:11], 0xe0
; GFX942-SDAG-NEXT: s_mov_b64 s[12:13], 0xd0
; GFX942-SDAG-NEXT: s_mov_b64 s[14:15], 0xc0
; GFX942-SDAG-NEXT: s_mov_b64 s[16:17], 0xb0
; GFX942-SDAG-NEXT: s_mov_b64 s[18:19], 0xa0
; GFX942-SDAG-NEXT: s_mov_b64 s[20:21], 0x90
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800
; GFX942-SDAG-NEXT: .LBB5_1: ; %static-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[36:37], v[0:1], 0, s[0:1]
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[38:39], v[36:37], 0, s[2:3]
; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[48:49], v[36:37], 0, s[4:5]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[50:51], v[36:37], 0, s[6:7]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[18:21] offset:64
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[52:53], v[36:37], 0, 48
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[10:13] offset:32
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[6:9] offset:16
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[2:5]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[54:55], v[36:37], 0, s[8:9]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[40:41], v[36:37], 0, s[10:11]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[42:43], v[36:37], 0, s[12:13]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[44:45], v[36:37], 0, s[14:15]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[46:47], v[36:37], 0, s[16:17]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[56:57], v[36:37], 0, s[18:19]
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[58:59], v[36:37], 0, s[20:21]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[36:37], v[2:5] offset:128
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[38:39], v[30:33]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[48:49], v[26:29]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[50:51], v[22:25]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[52:53], v[14:17]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[54:55], v[30:33]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[40:41], v[26:29]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[42:43], v[22:25]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[44:45], v[18:21]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[46:47], v[14:17]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[56:57], v[10:13]
; GFX942-SDAG-NEXT: flat_store_dwordx4 v[58:59], v[6:9]
; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB5_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
; GFX942-SDAG-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p0_sz2048_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GFX942-GISEL-NEXT: .LBB5_1: ; %static-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, v0, v8
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, v1, v9, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5]
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:16
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:32
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:48
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:64
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:80
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:96
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:112
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:128
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:144
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:160
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:176
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:192
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:208
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:224
; GFX942-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[2:5] offset:240
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc
; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7]
; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB5_1
; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p0.i64(ptr addrspace(0) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false)
ret void
}
define void @memset_p1_sz1055_align_4_varsetval(ptr addrspace(1) align 4 %dst, i8 %setval) {
; GFX942-SDAG-LABEL: memset_p1_sz1055_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v34, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v35, v4
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[36:37], 0x400
; GFX942-SDAG-NEXT: .LBB6_1: ; %static-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[38:39], v[0:1], 0, s[0:1]
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[36:37]
; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[32:35], off offset:112
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[28:31], off offset:96
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[24:27], off offset:80
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[20:23], off offset:64
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[16:19], off offset:48
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[12:15], off offset:32
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[8:11], off offset:16
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[4:7], off
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[32:35], off offset:240
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[28:31], off offset:224
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[24:27], off offset:208
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[20:23], off offset:192
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[16:19], off offset:176
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[12:15], off offset:160
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[8:11], off offset:144
; GFX942-SDAG-NEXT: global_store_dwordx4 v[38:39], v[4:7], off offset:128
; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB6_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v4, v2, v2, s0
; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4
; GFX942-SDAG-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX942-SDAG-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:1024
; GFX942-SDAG-NEXT: global_store_dwordx3 v[0:1], v[4:6], off offset:1040
; GFX942-SDAG-NEXT: global_store_short v[0:1], v3, off offset:1052
; GFX942-SDAG-NEXT: global_store_byte v[0:1], v2, off offset:1054
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p1_sz1055_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2
; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; GFX942-GISEL-NEXT: v_or3_b32 v4, v4, v5, v3
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v6, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v4
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0x400
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[0:1]
; GFX942-GISEL-NEXT: .LBB6_1: ; %static-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, v0, v10
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, v1, v11, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 0x100, v10
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:16
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:32
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:48
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:64
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:80
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:96
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:112
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:128
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:144
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:160
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:176
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:192
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:208
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:224
; GFX942-GISEL-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:240
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v11, vcc
; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[10:11], v[8:9]
; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB6_1
; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v2
; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v3, 8, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; GFX942-GISEL-NEXT: v_or3_b32 v6, v4, v5, v3
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8
; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3
; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v7, v6
; GFX942-GISEL-NEXT: v_mov_b32_e32 v8, v6
; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, v6
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4
; GFX942-GISEL-NEXT: global_store_dwordx4 v[0:1], v[6:9], off offset:1024
; GFX942-GISEL-NEXT: global_store_dwordx3 v[0:1], v[4:6], off offset:1040
; GFX942-GISEL-NEXT: global_store_short v[0:1], v3, off offset:1052
; GFX942-GISEL-NEXT: global_store_byte v[0:1], v2, off offset:1054
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false)
ret void
}
define void @memset_p1_sz2048_align_4_varsetval(ptr addrspace(1) align 4 %dst, i8 %setval) {
; GFX942-SDAG-LABEL: memset_p1_sz2048_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v2, v2, v2, s0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800
; GFX942-SDAG-NEXT: .LBB7_1: ; %static-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[36:37], v[0:1], 0, s[0:1]
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35]
; GFX942-SDAG-NEXT: s_and_b64 vcc, exec, vcc
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[30:33], off offset:112
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[26:29], off offset:96
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[22:25], off offset:80
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[18:21], off offset:64
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[14:17], off offset:48
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[10:13], off offset:32
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[6:9], off offset:16
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[2:5], off
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[30:33], off offset:240
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[26:29], off offset:224
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[22:25], off offset:208
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[18:21], off offset:192
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[14:17], off offset:176
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[10:13], off offset:160
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[6:9], off offset:144
; GFX942-SDAG-NEXT: global_store_dwordx4 v[36:37], v[2:5], off offset:128
; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB7_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p1_sz2048_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GFX942-GISEL-NEXT: .LBB7_1: ; %static-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, v0, v8
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, v1, v9, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:16
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:32
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:48
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:64
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:80
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:96
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:112
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:128
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:144
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:160
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:176
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:192
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:208
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:224
; GFX942-GISEL-NEXT: global_store_dwordx4 v[10:11], v[2:5], off offset:240
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc
; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7]
; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB7_1
; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false)
ret void
}
define void @memset_p3_sz1055_align_4_varsetval(ptr addrspace(3) align 4 %dst, i8 %setval) {
; GFX942-SDAG-LABEL: memset_p3_sz1055_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x400
; GFX942-SDAG-NEXT: v_mov_b32_e32 v36, v0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2
; GFX942-SDAG-NEXT: .LBB8_1: ; %static-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35]
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v32, v33 offset0:30 offset1:31
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v30, v31 offset0:28 offset1:29
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v28, v29 offset0:26 offset1:27
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v26, v27 offset0:24 offset1:25
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v24, v25 offset0:22 offset1:23
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v22, v23 offset0:20 offset1:21
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v20, v21 offset0:18 offset1:19
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v18, v19 offset0:16 offset1:17
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v16, v17 offset0:14 offset1:15
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v14, v15 offset0:12 offset1:13
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v12, v13 offset0:10 offset1:11
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v10, v11 offset0:8 offset1:9
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v8, v9 offset0:6 offset1:7
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v6, v7 offset0:4 offset1:5
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v4, v5 offset0:2 offset1:3
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v2, v3 offset1:1
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v32, v33 offset0:62 offset1:63
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v30, v31 offset0:60 offset1:61
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v28, v29 offset0:58 offset1:59
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v26, v27 offset0:56 offset1:57
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v24, v25 offset0:54 offset1:55
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v22, v23 offset0:52 offset1:53
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v20, v21 offset0:50 offset1:51
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v18, v19 offset0:48 offset1:49
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v16, v17 offset0:46 offset1:47
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v14, v15 offset0:44 offset1:45
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v12, v13 offset0:42 offset1:43
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v10, v11 offset0:40 offset1:41
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v8, v9 offset0:38 offset1:39
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v6, v7 offset0:36 offset1:37
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v4, v5 offset0:34 offset1:35
; GFX942-SDAG-NEXT: ds_write2_b32 v36, v2, v3 offset0:32 offset1:33
; GFX942-SDAG-NEXT: v_add_u32_e32 v36, 0x100, v36
; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB8_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 0x400, v0
; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 0x408, v0
; GFX942-SDAG-NEXT: v_perm_b32 v4, v1, v1, s0
; GFX942-SDAG-NEXT: ds_write2_b32 v3, v4, v4 offset1:1
; GFX942-SDAG-NEXT: ds_write2_b32 v2, v4, v4 offset1:1
; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 0x410, v0
; GFX942-SDAG-NEXT: ds_write2_b32 v2, v4, v4 offset1:1
; GFX942-SDAG-NEXT: ds_write_b32 v0, v4 offset:1048
; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v2, 8, v1
; GFX942-SDAG-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX942-SDAG-NEXT: ds_write_b16 v0, v2 offset:1052
; GFX942-SDAG-NEXT: ds_write_b8 v0, v1 offset:1054
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p3_sz1055_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1
; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x400
; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v0
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GFX942-GISEL-NEXT: .LBB8_1: ; %static-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset1:1
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:2 offset1:3
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:4 offset1:5
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:6 offset1:7
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:8 offset1:9
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:10 offset1:11
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:12 offset1:13
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:14 offset1:15
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:16 offset1:17
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:18 offset1:19
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:20 offset1:21
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:22 offset1:23
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:24 offset1:25
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:26 offset1:27
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:28 offset1:29
; GFX942-GISEL-NEXT: ds_write2_b64 v10, v[2:3], v[4:5] offset0:30 offset1:31
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc
; GFX942-GISEL-NEXT: v_add_u32_e32 v10, 0x100, v10
; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7]
; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB8_1
; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1
; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:128 offset1:129
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8
; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3
; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4
; GFX942-GISEL-NEXT: v_add_u32_e32 v5, 0x410, v0
; GFX942-GISEL-NEXT: ds_write2_b32 v5, v4, v4 offset1:1
; GFX942-GISEL-NEXT: ds_write_b32 v0, v2 offset:1048
; GFX942-GISEL-NEXT: ds_write_b16 v0, v3 offset:1052
; GFX942-GISEL-NEXT: ds_write_b8 v0, v1 offset:1054
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p3.i64(ptr addrspace(3) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false)
ret void
}
define void @memset_p3_sz2048_align_4_varsetval(ptr addrspace(3) align 4 %dst, i8 %setval) {
; GFX942-SDAG-LABEL: memset_p3_sz2048_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2
; GFX942-SDAG-NEXT: .LBB9_1: ; %static-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35]
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v32, v31 offset0:30 offset1:31
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v30, v29 offset0:28 offset1:29
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v28, v27 offset0:26 offset1:27
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v26, v25 offset0:24 offset1:25
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v24, v23 offset0:22 offset1:23
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v22, v21 offset0:20 offset1:21
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v20, v19 offset0:18 offset1:19
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v18, v17 offset0:16 offset1:17
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v16, v15 offset0:14 offset1:15
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v14, v13 offset0:12 offset1:13
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v12, v11 offset0:10 offset1:11
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v10, v9 offset0:8 offset1:9
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v8, v7 offset0:6 offset1:7
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v6, v5 offset0:4 offset1:5
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v4, v3 offset0:2 offset1:3
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v2, v1 offset1:1
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v32, v31 offset0:62 offset1:63
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v30, v29 offset0:60 offset1:61
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v28, v27 offset0:58 offset1:59
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v26, v25 offset0:56 offset1:57
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v24, v23 offset0:54 offset1:55
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v22, v21 offset0:52 offset1:53
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v20, v19 offset0:50 offset1:51
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v18, v17 offset0:48 offset1:49
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v16, v15 offset0:46 offset1:47
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v14, v13 offset0:44 offset1:45
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v12, v11 offset0:42 offset1:43
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v10, v9 offset0:40 offset1:41
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v8, v7 offset0:38 offset1:39
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v6, v5 offset0:36 offset1:37
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v4, v3 offset0:34 offset1:35
; GFX942-SDAG-NEXT: ds_write2_b32 v0, v2, v1 offset0:32 offset1:33
; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 0x100, v0
; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB9_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p3_sz2048_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX942-GISEL-NEXT: v_lshl_or_b32 v2, v1, 8, v1
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; GFX942-GISEL-NEXT: v_or3_b32 v2, v2, v3, v1
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GFX942-GISEL-NEXT: .LBB9_1: ; %static-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:4 offset1:5
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:6 offset1:7
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:8 offset1:9
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:10 offset1:11
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:12 offset1:13
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:14 offset1:15
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:16 offset1:17
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:18 offset1:19
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:20 offset1:21
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:22 offset1:23
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:24 offset1:25
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:26 offset1:27
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:28 offset1:29
; GFX942-GISEL-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:30 offset1:31
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 0x100, v0
; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7]
; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB9_1
; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p3.i64(ptr addrspace(3) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false)
ret void
}
define void @memset_p5_sz1055_align_4_varsetval(ptr addrspace(5) align 4 %dst, i8 %setval) {
; GFX942-SDAG-LABEL: memset_p5_sz1055_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x400
; GFX942-SDAG-NEXT: v_mov_b32_e32 v36, v0
; GFX942-SDAG-NEXT: .LBB10_1: ; %static-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35]
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[30:33], off offset:112
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[26:29], off offset:96
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[22:25], off offset:80
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[18:21], off offset:64
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[14:17], off offset:48
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[10:13], off offset:32
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[6:9], off offset:16
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[2:5], off
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[30:33], off offset:240
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[26:29], off offset:224
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[22:25], off offset:208
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[18:21], off offset:192
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[14:17], off offset:176
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[10:13], off offset:160
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[6:9], off offset:144
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v36, v[2:5], off offset:128
; GFX942-SDAG-NEXT: v_add_u32_e32 v36, 0x100, v36
; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB10_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:1024
; GFX942-SDAG-NEXT: scratch_store_dwordx2 v0, v[2:3], off offset:1040
; GFX942-SDAG-NEXT: scratch_store_dword v0, v2, off offset:1048
; GFX942-SDAG-NEXT: v_lshlrev_b16_e32 v2, 8, v1
; GFX942-SDAG-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX942-SDAG-NEXT: scratch_store_short v0, v2, off offset:1052
; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1054
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p5_sz1055_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1
; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x400
; GFX942-GISEL-NEXT: v_mov_b32_e32 v10, v0
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GFX942-GISEL-NEXT: .LBB10_1: ; %static-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:16
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:32
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:48
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:64
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:80
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:96
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:112
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:128
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:144
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:160
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:176
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:192
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:208
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:224
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v10, v[2:5], off offset:240
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc
; GFX942-GISEL-NEXT: v_add_u32_e32 v10, 0x100, v10
; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7]
; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB10_1
; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1
; GFX942-GISEL-NEXT: v_lshl_or_b32 v3, v2, 8, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX942-GISEL-NEXT: v_or3_b32 v2, v3, v4, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:1024
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, 8
; GFX942-GISEL-NEXT: v_lshlrev_b16_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX942-GISEL-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX942-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v3
; GFX942-GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v4
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v4
; GFX942-GISEL-NEXT: scratch_store_dwordx2 v0, v[4:5], off offset:1040
; GFX942-GISEL-NEXT: scratch_store_dword v0, v2, off offset:1048
; GFX942-GISEL-NEXT: scratch_store_short v0, v3, off offset:1052
; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off offset:1054
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef nonnull align 4 %dst, i8 %setval, i64 1055, i1 false)
ret void
}
define void @memset_p5_sz2048_align_4_varsetval(ptr addrspace(5) align 4 %dst, i8 %setval) {
; GFX942-SDAG-LABEL: memset_p5_sz2048_align_4_varsetval:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: s_mov_b32 s0, 0x4040404
; GFX942-SDAG-NEXT: v_perm_b32 v2, v1, v1, s0
; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v12, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v13, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v14, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v15, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v16, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v17, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v18, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v19, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v20, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v21, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v22, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v23, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v24, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v25, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v26, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v27, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v28, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v29, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v30, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v31, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v32, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v33, v2
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_mov_b64_e32 v[34:35], 0x800
; GFX942-SDAG-NEXT: .LBB11_1: ; %static-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: s_add_u32 s0, s0, 0x100
; GFX942-SDAG-NEXT: s_addc_u32 s1, s1, 0
; GFX942-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[34:35]
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[30:33], off offset:112
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[26:29], off offset:96
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[22:25], off offset:80
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[18:21], off offset:64
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[14:17], off offset:48
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:32
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:16
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[2:5], off
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[30:33], off offset:240
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[26:29], off offset:224
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[22:25], off offset:208
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[18:21], off offset:192
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[14:17], off offset:176
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:160
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:144
; GFX942-SDAG-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:128
; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 0x100, v0
; GFX942-SDAG-NEXT: s_cbranch_vccnz .LBB11_1
; GFX942-SDAG-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p5_sz2048_align_4_varsetval:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX942-GISEL-NEXT: v_lshl_or_b32 v2, v1, 8, v1
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; GFX942-GISEL-NEXT: v_or3_b32 v2, v2, v3, v1
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, v2
; GFX942-GISEL-NEXT: v_mov_b32_e32 v5, v2
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], 0x800
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GFX942-GISEL-NEXT: .LBB11_1: ; %static-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 0x100, v8
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:16
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:32
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:48
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:64
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:80
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:96
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:112
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:128
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:144
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:160
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:176
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:192
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:208
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:224
; GFX942-GISEL-NEXT: scratch_store_dwordx4 v0, v[2:5], off offset:240
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 0x100, v0
; GFX942-GISEL-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[6:7]
; GFX942-GISEL-NEXT: s_cbranch_vccnz .LBB11_1
; GFX942-GISEL-NEXT: ; %bb.2: ; %static-memset-post-expansion
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef nonnull align 4 %dst, i8 %setval, i64 2048, i1 false)
ret void
}
define void @memset_p1_varsz_align_4_set40(ptr addrspace(1) align 4 %dst, i64 %size) {
; GFX942-SDAG-LABEL: memset_p1_varsz_align_4_set40:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_and_b32_e32 v10, -16, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v3
; GFX942-SDAG-NEXT: v_and_b32_e32 v8, 15, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, 0
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB12_3
; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, 0x28282828
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v6, v4
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v4
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB12_2: ; %dynamic-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[12:13], v[0:1], 0, s[4:5]
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[10:11]
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: global_store_dwordx4 v[12:13], v[4:7], off
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB12_2
; GFX942-SDAG-NEXT: .LBB12_3: ; %Flow4
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB12_6
; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 40
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB12_5: ; %dynamic-memset-expansion-residual-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[4:5]
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[8:9]
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: global_store_byte v[4:5], v2, off
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB12_5
; GFX942-SDAG-NEXT: .LBB12_6: ; %Flow2
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p1_varsz_align_4_set40:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v2
; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v8
; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v3, vcc
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB12_3
; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-GISEL-NEXT: s_mov_b32 s4, 0x28282828
; GFX942-GISEL-NEXT: s_mov_b32 s5, s4
; GFX942-GISEL-NEXT: s_mov_b32 s6, s4
; GFX942-GISEL-NEXT: s_mov_b32 s7, s4
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1]
; GFX942-GISEL-NEXT: .LBB12_2: ; %dynamic-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, v0, v12
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, v1, v13, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12
; GFX942-GISEL-NEXT: global_store_dwordx4 v[14:15], v[4:7], off
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[10:11]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB12_2
; GFX942-GISEL-NEXT: .LBB12_3: ; %Flow4
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB12_6
; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3]
; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 4, v[0:1]
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, 40
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-GISEL-NEXT: .LBB12_5: ; %dynamic-memset-expansion-residual-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v2
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v3, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2
; GFX942-GISEL-NEXT: global_store_byte v[6:7], v4, off
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[8:9]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB12_5
; GFX942-GISEL-NEXT: .LBB12_6: ; %Flow2
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 40, i64 %size, i1 false)
ret void
}
define void @memset_p1_varsz_align_4_set0(ptr addrspace(1) align 4 %dst, i64 %size) {
; GFX942-SDAG-LABEL: memset_p1_varsz_align_4_set0:
; GFX942-SDAG: ; %bb.0: ; %entry
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-SDAG-NEXT: v_and_b32_e32 v6, -16, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v7, v3
; GFX942-SDAG-NEXT: v_and_b32_e32 v4, 15, v2
; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, 0
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB13_3
; GFX942-SDAG-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-SDAG-NEXT: v_mov_b32_e32 v8, v5
; GFX942-SDAG-NEXT: v_mov_b32_e32 v9, v5
; GFX942-SDAG-NEXT: v_mov_b32_e32 v10, v5
; GFX942-SDAG-NEXT: v_mov_b32_e32 v11, v5
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB13_2: ; %dynamic-memset-expansion-main-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[12:13], v[0:1], 0, s[4:5]
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 16
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[6:7]
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: global_store_dwordx4 v[12:13], v[8:11], off
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB13_2
; GFX942-SDAG-NEXT: .LBB13_3: ; %Flow4
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_mov_b64 s[0:1], 0
; GFX942-SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GFX942-SDAG-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-SDAG-NEXT: s_cbranch_execz .LBB13_6
; GFX942-SDAG-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-SDAG-NEXT: v_and_b32_e32 v2, -16, v2
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX942-SDAG-NEXT: s_mov_b64 s[4:5], 0
; GFX942-SDAG-NEXT: .LBB13_5: ; %dynamic-memset-expansion-residual-body
; GFX942-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-SDAG-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[4:5]
; GFX942-SDAG-NEXT: s_add_u32 s4, s4, 1
; GFX942-SDAG-NEXT: s_addc_u32 s5, s5, 0
; GFX942-SDAG-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[4:5]
; GFX942-SDAG-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-SDAG-NEXT: global_store_byte v[6:7], v2, off
; GFX942-SDAG-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-SDAG-NEXT: s_cbranch_execnz .LBB13_5
; GFX942-SDAG-NEXT: .LBB13_6: ; %Flow2
; GFX942-SDAG-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-GISEL-LABEL: memset_p1_varsz_align_4_set0:
; GFX942-GISEL: ; %bb.0: ; %entry
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-GISEL-NEXT: v_and_b32_e32 v8, 15, v2
; GFX942-GISEL-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v8
; GFX942-GISEL-NEXT: s_mov_b32 s0, 0
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v3, vcc
; GFX942-GISEL-NEXT: v_mov_b32_e32 v9, 0
; GFX942-GISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB13_3
; GFX942-GISEL-NEXT: ; %bb.1: ; %dynamic-memset-expansion-main-body.preheader
; GFX942-GISEL-NEXT: s_mov_b32 s2, s0
; GFX942-GISEL-NEXT: s_mov_b32 s3, s0
; GFX942-GISEL-NEXT: s_mov_b32 s1, s0
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3]
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[4:5]
; GFX942-GISEL-NEXT: .LBB13_2: ; %dynamic-memset-expansion-main-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, v0, v12
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, v1, v13, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 16, v12
; GFX942-GISEL-NEXT: global_store_dwordx4 v[14:15], v[4:7], off
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[12:13], v[10:11]
; GFX942-GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB13_2
; GFX942-GISEL-NEXT: .LBB13_3: ; %Flow4
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX942-GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX942-GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
; GFX942-GISEL-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-GISEL-NEXT: s_cbranch_execz .LBB13_6
; GFX942-GISEL-NEXT: ; %bb.4: ; %dynamic-memset-expansion-residual-body.preheader
; GFX942-GISEL-NEXT: v_lshrrev_b64 v[2:3], 4, v[2:3]
; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 4, v[0:1]
; GFX942-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-GISEL-NEXT: .LBB13_5: ; %dynamic-memset-expansion-residual-body
; GFX942-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v2
; GFX942-GISEL-NEXT: s_nop 1
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v3, vcc
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2
; GFX942-GISEL-NEXT: global_store_byte v[6:7], v4, off
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX942-GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[8:9]
; GFX942-GISEL-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX942-GISEL-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX942-GISEL-NEXT: s_cbranch_execnz .LBB13_5
; GFX942-GISEL-NEXT: .LBB13_6: ; %Flow2
; GFX942-GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 %dst, i8 0, i64 %size, i1 false)
ret void
}
declare void @llvm.memset.p0.i64(ptr addrspace(0) noalias nocapture writeonly, i8, i64, i1 immarg)
declare void @llvm.memset.p1.i64(ptr addrspace(1) noalias nocapture writeonly, i8, i64, i1 immarg)
declare void @llvm.memset.p3.i64(ptr addrspace(3) noalias nocapture writeonly, i8, i64, i1 immarg)
declare void @llvm.memset.p5.i64(ptr addrspace(5) noalias nocapture writeonly, i8, i64, i1 immarg)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX942: {{.*}}