| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s |
| |
| ; Make sure the stack is never realigned for entry functions. |
| |
| define amdgpu_kernel void @max_alignment_128() #0 { |
| ; VI-LABEL: max_alignment_128: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_add_u32 s4, s4, s7 |
| ; VI-NEXT: v_mov_b32_e32 v0, 9 |
| ; VI-NEXT: s_mov_b32 flat_scratch_lo, s5 |
| ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 |
| ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:128 |
| ; VI-NEXT: s_endpgm |
| ; VI-NEXT: .section .rodata,#alloc |
| ; VI-NEXT: .p2align 6 |
| ; VI-NEXT: .amdhsa_kernel max_alignment_128 |
| ; VI-NEXT: .amdhsa_group_segment_fixed_size 0 |
| ; VI-NEXT: .amdhsa_private_segment_fixed_size 256 |
| ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 |
| ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 |
| ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 |
| ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 |
| ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 |
| ; VI-NEXT: .amdhsa_next_free_vgpr 1 |
| ; VI-NEXT: .amdhsa_next_free_sgpr 8 |
| ; VI-NEXT: .amdhsa_reserve_vcc 0 |
| ; VI-NEXT: .amdhsa_float_round_mode_32 0 |
| ; VI-NEXT: .amdhsa_float_round_mode_16_64 0 |
| ; VI-NEXT: .amdhsa_float_denorm_mode_32 0 |
| ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 |
| ; VI-NEXT: .amdhsa_dx10_clamp 1 |
| ; VI-NEXT: .amdhsa_ieee_mode 1 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 |
| ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 |
| ; VI-NEXT: .amdhsa_exception_int_div_zero 0 |
| ; VI-NEXT: .end_amdhsa_kernel |
| ; VI-NEXT: .text |
| ; |
| ; GFX9-LABEL: max_alignment_128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, 9 |
| ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 |
| ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:128 |
| ; GFX9-NEXT: s_endpgm |
| ; GFX9-NEXT: .section .rodata,#alloc |
| ; GFX9-NEXT: .p2align 6 |
| ; GFX9-NEXT: .amdhsa_kernel max_alignment_128 |
| ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 |
| ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 |
| ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 |
| ; GFX9-NEXT: .amdhsa_next_free_vgpr 1 |
| ; GFX9-NEXT: .amdhsa_next_free_sgpr 8 |
| ; GFX9-NEXT: .amdhsa_reserve_vcc 0 |
| ; GFX9-NEXT: .amdhsa_float_round_mode_32 0 |
| ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 |
| ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0 |
| ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 |
| ; GFX9-NEXT: .amdhsa_dx10_clamp 1 |
| ; GFX9-NEXT: .amdhsa_ieee_mode 1 |
| ; GFX9-NEXT: .amdhsa_fp16_overflow 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 |
| ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 |
| ; GFX9-NEXT: .end_amdhsa_kernel |
| ; GFX9-NEXT: .text |
| %alloca.align = alloca i32, align 128, addrspace(5) |
| store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128 |
| ret void |
| } |
| |
| define amdgpu_kernel void @stackrealign_attr() #1 { |
| ; VI-LABEL: stackrealign_attr: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_add_u32 s4, s4, s7 |
| ; VI-NEXT: v_mov_b32_e32 v0, 9 |
| ; VI-NEXT: s_mov_b32 flat_scratch_lo, s5 |
| ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 |
| ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4 |
| ; VI-NEXT: s_endpgm |
| ; VI-NEXT: .section .rodata,#alloc |
| ; VI-NEXT: .p2align 6 |
| ; VI-NEXT: .amdhsa_kernel stackrealign_attr |
| ; VI-NEXT: .amdhsa_group_segment_fixed_size 0 |
| ; VI-NEXT: .amdhsa_private_segment_fixed_size 8 |
| ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 |
| ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 |
| ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 |
| ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 |
| ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 |
| ; VI-NEXT: .amdhsa_next_free_vgpr 1 |
| ; VI-NEXT: .amdhsa_next_free_sgpr 8 |
| ; VI-NEXT: .amdhsa_reserve_vcc 0 |
| ; VI-NEXT: .amdhsa_float_round_mode_32 0 |
| ; VI-NEXT: .amdhsa_float_round_mode_16_64 0 |
| ; VI-NEXT: .amdhsa_float_denorm_mode_32 0 |
| ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 |
| ; VI-NEXT: .amdhsa_dx10_clamp 1 |
| ; VI-NEXT: .amdhsa_ieee_mode 1 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 |
| ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 |
| ; VI-NEXT: .amdhsa_exception_int_div_zero 0 |
| ; VI-NEXT: .end_amdhsa_kernel |
| ; VI-NEXT: .text |
| ; |
| ; GFX9-LABEL: stackrealign_attr: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, 9 |
| ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 |
| ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4 |
| ; GFX9-NEXT: s_endpgm |
| ; GFX9-NEXT: .section .rodata,#alloc |
| ; GFX9-NEXT: .p2align 6 |
| ; GFX9-NEXT: .amdhsa_kernel stackrealign_attr |
| ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 |
| ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 |
| ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 |
| ; GFX9-NEXT: .amdhsa_next_free_vgpr 1 |
| ; GFX9-NEXT: .amdhsa_next_free_sgpr 8 |
| ; GFX9-NEXT: .amdhsa_reserve_vcc 0 |
| ; GFX9-NEXT: .amdhsa_float_round_mode_32 0 |
| ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 |
| ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0 |
| ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 |
| ; GFX9-NEXT: .amdhsa_dx10_clamp 1 |
| ; GFX9-NEXT: .amdhsa_ieee_mode 1 |
| ; GFX9-NEXT: .amdhsa_fp16_overflow 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 |
| ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 |
| ; GFX9-NEXT: .end_amdhsa_kernel |
| ; GFX9-NEXT: .text |
| %alloca.align = alloca i32, align 4, addrspace(5) |
| store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @alignstack_attr() #2 { |
| ; VI-LABEL: alignstack_attr: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_add_u32 s4, s4, s7 |
| ; VI-NEXT: v_mov_b32_e32 v0, 9 |
| ; VI-NEXT: s_mov_b32 flat_scratch_lo, s5 |
| ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 |
| ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4 |
| ; VI-NEXT: s_endpgm |
| ; VI-NEXT: .section .rodata,#alloc |
| ; VI-NEXT: .p2align 6 |
| ; VI-NEXT: .amdhsa_kernel alignstack_attr |
| ; VI-NEXT: .amdhsa_group_segment_fixed_size 0 |
| ; VI-NEXT: .amdhsa_private_segment_fixed_size 128 |
| ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 |
| ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 |
| ; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 |
| ; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 |
| ; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 |
| ; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 |
| ; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 |
| ; VI-NEXT: .amdhsa_next_free_vgpr 1 |
| ; VI-NEXT: .amdhsa_next_free_sgpr 8 |
| ; VI-NEXT: .amdhsa_reserve_vcc 0 |
| ; VI-NEXT: .amdhsa_float_round_mode_32 0 |
| ; VI-NEXT: .amdhsa_float_round_mode_16_64 0 |
| ; VI-NEXT: .amdhsa_float_denorm_mode_32 0 |
| ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 |
| ; VI-NEXT: .amdhsa_dx10_clamp 1 |
| ; VI-NEXT: .amdhsa_ieee_mode 1 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 |
| ; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 |
| ; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 |
| ; VI-NEXT: .amdhsa_exception_int_div_zero 0 |
| ; VI-NEXT: .end_amdhsa_kernel |
| ; VI-NEXT: .text |
| ; |
| ; GFX9-LABEL: alignstack_attr: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, 9 |
| ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 |
| ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4 |
| ; GFX9-NEXT: s_endpgm |
| ; GFX9-NEXT: .section .rodata,#alloc |
| ; GFX9-NEXT: .p2align 6 |
| ; GFX9-NEXT: .amdhsa_kernel alignstack_attr |
| ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 |
| ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 |
| ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 |
| ; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 |
| ; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 |
| ; GFX9-NEXT: .amdhsa_next_free_vgpr 1 |
| ; GFX9-NEXT: .amdhsa_next_free_sgpr 8 |
| ; GFX9-NEXT: .amdhsa_reserve_vcc 0 |
| ; GFX9-NEXT: .amdhsa_float_round_mode_32 0 |
| ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 |
| ; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0 |
| ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 |
| ; GFX9-NEXT: .amdhsa_dx10_clamp 1 |
| ; GFX9-NEXT: .amdhsa_ieee_mode 1 |
| ; GFX9-NEXT: .amdhsa_fp16_overflow 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 |
| ; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 |
| ; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 |
| ; GFX9-NEXT: .end_amdhsa_kernel |
| ; GFX9-NEXT: .text |
| %alloca.align = alloca i32, align 4, addrspace(5) |
| store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4 |
| ret void |
| } |
| |
| attributes #0 = { nounwind } |
| attributes #1 = { nounwind "stackrealign" } |
| attributes #2 = { nounwind alignstack=128 } |