| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s |
| |
| define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_swap_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_swap_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_add_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_sub_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_sub_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_smin_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_smin_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_umin_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_umin_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_smax_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_smax_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_umax_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_umax_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_and_i321d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_and_i321d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_and_i321d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_or_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_or_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_xor_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_xor_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_inc_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_inc_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_dec_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_dec_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) { |
| ; GFX9-LABEL: atomic_cmpswap_i32_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_cmpswap_i32_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) { |
| ; GFX9-LABEL: atomic_add_i32_2d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 |
| ; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i32_2d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) { |
| ; GFX9-LABEL: atomic_add_i32_3d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX9-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 |
| ; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 |
| ; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i32_3d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 |
| ; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) { |
| ; GFX9-LABEL: atomic_add_i32_cube: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX9-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 |
| ; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 |
| ; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i32_cube: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 |
| ; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) { |
| ; GFX9-LABEL: atomic_add_i32_1darray: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 |
| ; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 da |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i32_1darray: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) { |
| ; GFX9-LABEL: atomic_add_i32_2darray: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX9-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 |
| ; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 |
| ; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i32_2darray: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 |
| ; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) { |
| ; GFX9-LABEL: atomic_add_i32_2dmsaa: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX9-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 |
| ; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 |
| ; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i32_2dmsaa: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 |
| ; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) { |
| ; GFX9-LABEL: atomic_add_i32_2darraymsaa: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX9-NEXT: v_and_or_b32 v1, v1, v5, v2 |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v2, v3, v5, v2 |
| ; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i32_2darraymsaa: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v2 |
| ; GFX10-NEXT: v_and_or_b32 v2, v3, v5, v4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_add_i32_1d_slc: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i32_1d_slc: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) |
| %out = bitcast i32 %v to float |
| ret float %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_swap_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_swap_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_add_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_sub_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_sub_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_smin_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_smin_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_umin_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_umin_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_smax_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_smax_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_umax_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_umax_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_and_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_and_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_or_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_or_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_xor_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_xor_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_inc_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_inc_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_dec_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_dec_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i16 %s) { |
| ; GFX9-LABEL: atomic_cmpswap_i64_1d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_cmpswap_i64_1d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64 %cmp, i64 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t) { |
| ; GFX9-LABEL: atomic_add_i64_2d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 |
| ; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i64_2d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, v3 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %r) { |
| ; GFX9-LABEL: atomic_add_i64_3d: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX9-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 |
| ; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 |
| ; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i64_3d: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 |
| ; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %face) { |
| ; GFX9-LABEL: atomic_add_i64_cube: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX9-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 |
| ; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 |
| ; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i64_cube: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 |
| ; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64 %data, i16 %s, i16 %t, i16 %face , <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %slice) { |
| ; GFX9-LABEL: atomic_add_i64_1darray: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 |
| ; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 da |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i64_1darray: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, v3 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice) { |
| ; GFX9-LABEL: atomic_add_i64_2darray: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX9-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 |
| ; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 |
| ; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i64_2darray: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 |
| ; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %fragid) { |
| ; GFX9-LABEL: atomic_add_i64_2dmsaa: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX9-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 |
| ; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 |
| ; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i64_2dmsaa: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_lshl_b32 s8, s0, 16 |
| ; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 |
| ; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) { |
| ; GFX9-LABEL: atomic_add_i64_2darraymsaa: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX9-NEXT: v_and_or_b32 v2, v2, v6, v3 |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v5 |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: v_and_or_b32 v3, v4, v6, v3 |
| ; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i64_2darraymsaa: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: v_and_or_b32 v2, v2, v6, v3 |
| ; GFX10-NEXT: v_and_or_b32 v3, v4, v6, v5 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { |
| ; GFX9-LABEL: atomic_add_i64_1d_slc: |
| ; GFX9: ; %bb.0: ; %main_body |
| ; GFX9-NEXT: s_mov_b32 s0, s2 |
| ; GFX9-NEXT: s_mov_b32 s1, s3 |
| ; GFX9-NEXT: s_mov_b32 s2, s4 |
| ; GFX9-NEXT: s_mov_b32 s3, s5 |
| ; GFX9-NEXT: s_mov_b32 s4, s6 |
| ; GFX9-NEXT: s_mov_b32 s5, s7 |
| ; GFX9-NEXT: s_mov_b32 s6, s8 |
| ; GFX9-NEXT: s_mov_b32 s7, s9 |
| ; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc a16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: atomic_add_i64_1d_slc: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GFX10-NEXT: s_mov_b32 s1, s3 |
| ; GFX10-NEXT: s_mov_b32 s2, s4 |
| ; GFX10-NEXT: s_mov_b32 s3, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, s6 |
| ; GFX10-NEXT: s_mov_b32 s5, s7 |
| ; GFX10-NEXT: s_mov_b32 s6, s8 |
| ; GFX10-NEXT: s_mov_b32 s7, s9 |
| ; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc a16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: ; return to shader part epilog |
| main_body: |
| %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) |
| %out = bitcast i64 %v to <2 x float> |
| ret <2 x float> %out |
| } |
| |
| declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| |
| declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64, i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 |
| |
| attributes #0 = { nounwind } |