| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s |
| |
| ; ========= Single bit functions ========= |
| |
| define amdgpu_ps float @not_and_not_and_not_and(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: not_and_not_and_not_and: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:1 |
| ; GCN-NEXT: ; return to shader part epilog |
| %nota = xor i32 %a, -1 |
| %notb = xor i32 %b, -1 |
| %notc = xor i32 %c, -1 |
| %and1 = and i32 %nota, %notc |
| %and2 = and i32 %and1, %notb |
| %ret_cast = bitcast i32 %and2 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @not_and_not_and_and(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: not_and_not_and_and: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %nota = xor i32 %a, -1 |
| %notb = xor i32 %b, -1 |
| %and1 = and i32 %nota, %c |
| %and2 = and i32 %and1, %notb |
| %ret_cast = bitcast i32 %and2 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @not_and_and_not_and(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: not_and_and_not_and: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:4 |
| ; GCN-NEXT: ; return to shader part epilog |
| %nota = xor i32 %a, -1 |
| %notc = xor i32 %c, -1 |
| %and1 = and i32 %nota, %notc |
| %and2 = and i32 %and1, %b |
| %ret_cast = bitcast i32 %and2 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @not_and_and_and(i32 %a, i32 %b, i32 %c) { |
| ; GFX950-SDAG-LABEL: not_and_and_and: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:8 |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: not_and_and_and: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc |
| ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %nota = xor i32 %a, -1 |
| %and1 = and i32 %nota, %c |
| %and2 = and i32 %and1, %b |
| %ret_cast = bitcast i32 %and2 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @and_not_and_not_and(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: and_not_and_not_and: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x10 |
| ; GCN-NEXT: ; return to shader part epilog |
| %notb = xor i32 %b, -1 |
| %notc = xor i32 %c, -1 |
| %and1 = and i32 %a, %notc |
| %and2 = and i32 %and1, %notb |
| %ret_cast = bitcast i32 %and2 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @and_not_and_and(i32 %a, i32 %b, i32 %c) { |
| ; GFX950-SDAG-LABEL: and_not_and_and: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x20 |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: and_not_and_and: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v1 |
| ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 |
| ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %notb = xor i32 %b, -1 |
| %and1 = and i32 %a, %c |
| %and2 = and i32 %and1, %notb |
| %ret_cast = bitcast i32 %and2 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) { |
| ; GFX950-SDAG-LABEL: and_and_not_and: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x40 |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: and_and_not_and: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0x30 |
| ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %notc = xor i32 %c, -1 |
| %and1 = and i32 %a, %notc |
| %and2 = and i32 %and1, %b |
| %ret_cast = bitcast i32 %and2 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) { |
| ; GFX950-SDAG-LABEL: and_and_and: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80 |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: and_and_and: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %and1 = and i32 %a, %c |
| %and2 = and i32 %and1, %b |
| %ret_cast = bitcast i32 %and2 to float |
| ret float %ret_cast |
| } |
| |
| ; ========= Multi bit functions ========= |
| |
| define amdgpu_ps float @test_12(i32 %a, i32 %b) { |
| ; GFX950-SDAG-LABEL: test_12: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: test_12: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %nota = xor i32 %a, -1 |
| %and1 = and i32 %nota, %b |
| %ret_cast = bitcast i32 %and1 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @test_63(i32 %a, i32 %b) { |
| ; GFX950-SDAG-LABEL: test_63: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0x3f |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: test_63: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0 |
| ; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v1 |
| ; GFX950-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %nota = xor i32 %a, -1 |
| %notb = xor i32 %b, -1 |
| %or = or i32 %nota, %notb |
| %ret_cast = bitcast i32 %or to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @test_59(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: test_59: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x3b |
| ; GCN-NEXT: ; return to shader part epilog |
| %nota = xor i32 %a, -1 |
| %notb = xor i32 %b, -1 |
| %and1 = and i32 %nota, %c |
| %or = or i32 %and1, %notb |
| %ret_cast = bitcast i32 %or to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @test_126(i32 %a, i32 %b, i32 %c) { |
| ; GFX950-SDAG-LABEL: test_126: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v2, v1 bitop3:0x7e |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: test_126: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_xor_b32_e32 v1, v0, v1 |
| ; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 |
| ; GFX950-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %xor1 = xor i32 %a, %b |
| %xor2 = xor i32 %a, %c |
| %or = or i32 %xor1, %xor2 |
| %ret_cast = bitcast i32 %or to float |
| ret float %ret_cast |
| } |
| |
| ; Src vector exhausted during search but recovered using 'not' lookahead. |
| ; GlobalISel has slightly different input, so it does not happen. |
| |
| ; FIXME: Improve global isel code. |
| |
| define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) { |
| ; GFX950-SDAG-LABEL: test_12_src_overflow: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: test_12_src_overflow: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v0 |
| ; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v2 |
| ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc |
| ; GFX950-GISEL-NEXT: v_and_b32_e32 v2, v3, v4 |
| ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xc8 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %nota = xor i32 %a, -1 |
| %notc = xor i32 %c, -1 |
| %and1 = and i32 %nota, %c |
| %and2 = and i32 %and1, %b |
| %and3 = and i32 %nota, %notc |
| %and4 = and i32 %and3, %b |
| %or = or i32 %and2, %and4 |
| %ret_cast = bitcast i32 %or to float |
| ret float %ret_cast |
| } |
| |
| ; This could be a single LOP3 operation with tbl = 100, but Src vector exhausted during search. |
| |
| define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) { |
| ; GFX950-SDAG-LABEL: test_100_src_overflow: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v3, v1, v2, v0 bitop3:0x10 |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v4, v0, v2, v1 bitop3:0x40 |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v1, v2, v0 bitop3:0x20 |
| ; GFX950-SDAG-NEXT: v_or3_b32 v0, v3, v4, v0 |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: test_100_src_overflow: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_bitop3_b32 v3, v2, v0, v2 bitop3:3 |
| ; GFX950-GISEL-NEXT: v_and_b32_e32 v3, v1, v3 |
| ; GFX950-GISEL-NEXT: v_bitop3_b32 v4, v0, v1, v0 bitop3:0x30 |
| ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v1, v0 |
| ; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v2 |
| ; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v4, v2 |
| ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: v_or3_b32 v0, v3, v4, v0 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %or1 = or i32 %c, %a |
| %not1 = xor i32 %or1, -1 |
| %and1 = and i32 %b, %not1 |
| %not2 = xor i32 %b, -1 |
| %and2 = and i32 %a, %not2 |
| %and3 = and i32 %and2, %c |
| %and4 = and i32 %b, %a |
| %not3 = xor i32 %c, -1 |
| %and5 = and i32 %and4, %not3 |
| %or2 = or i32 %and1, %and3 |
| %or3 = or i32 %or2, %and5 |
| %ret_cast = bitcast i32 %or3 to float |
| ret float %ret_cast |
| } |
| |
| ; ========= Ternary logical operations take precedence ========= |
| |
| define amdgpu_ps float @test_xor3(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: test_xor3: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_xor_b32_e32 v0, v0, v1 |
| ; GCN-NEXT: v_xor_b32_e32 v0, v0, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %xor1 = xor i32 %a, %b |
| %xor2 = xor i32 %xor1, %c |
| %ret_cast = bitcast i32 %xor2 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @test_or3(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: test_or3: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_or3_b32 v0, v0, v1, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %or1 = or i32 %a, %b |
| %or2 = or i32 %or1, %c |
| %ret_cast = bitcast i32 %or2 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @test_and_or(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: test_and_or: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_and_or_b32 v0, v0, v1, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %and1 = and i32 %a, %b |
| %or1 = or i32 %and1, %c |
| %ret_cast = bitcast i32 %or1 to float |
| ret float %ret_cast |
| } |
| |
| ; ========= Uniform cases ========= |
| |
| define amdgpu_ps float @uniform_3_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) { |
| ; GCN-LABEL: uniform_3_op: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_andn2_b32 s0, s2, s0 |
| ; GCN-NEXT: s_and_b32 s0, s0, s1 |
| ; GCN-NEXT: v_mov_b32_e32 v0, s0 |
| ; GCN-NEXT: ; return to shader part epilog |
| %nota = xor i32 %a, -1 |
| %and1 = and i32 %nota, %c |
| %and2 = and i32 %and1, %b |
| %ret_cast = bitcast i32 %and2 to float |
| ret float %ret_cast |
| } |
| |
| define amdgpu_ps float @uniform_4_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) { |
| ; GFX950-SDAG-LABEL: uniform_4_op: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s2 |
| ; GFX950-SDAG-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:2 |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: uniform_4_op: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_andn2_b32 s0, s2, s0 |
| ; GFX950-GISEL-NEXT: s_andn2_b32 s0, s0, s1 |
| ; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %nota = xor i32 %a, -1 |
| %notb = xor i32 %b, -1 |
| %and1 = and i32 %nota, %c |
| %and2 = and i32 %and1, %notb |
| %ret_cast = bitcast i32 %and2 to float |
| ret float %ret_cast |
| } |
| |
| ; ========= 16 bit tests ========= |
| |
| define amdgpu_ps half @not_and_not_and_not_and_b16(i16 %a, i16 %b, i16 %c) { |
| ; GCN-LABEL: not_and_not_and_not_and_b16: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:1 |
| ; GCN-NEXT: ; return to shader part epilog |
| %nota = xor i16 %a, -1 |
| %notb = xor i16 %b, -1 |
| %notc = xor i16 %c, -1 |
| %and1 = and i16 %nota, %notc |
| %and2 = and i16 %and1, %notb |
| %ret_cast = bitcast i16 %and2 to half |
| ret half %ret_cast |
| } |
| |
| define amdgpu_ps half @not_and_not_and_and_b16(i16 %a, i16 %b, i16 %c) { |
| ; GCN-LABEL: not_and_not_and_and_b16: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %nota = xor i16 %a, -1 |
| %notb = xor i16 %b, -1 |
| %and1 = and i16 %nota, %c |
| %and2 = and i16 %and1, %notb |
| %ret_cast = bitcast i16 %and2 to half |
| ret half %ret_cast |
| } |
| |
| define amdgpu_ps half @not_and_and_not_and_b16(i16 %a, i16 %b, i16 %c) { |
| ; GCN-LABEL: not_and_and_not_and_b16: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:4 |
| ; GCN-NEXT: ; return to shader part epilog |
| %nota = xor i16 %a, -1 |
| %notc = xor i16 %c, -1 |
| %and1 = and i16 %nota, %notc |
| %and2 = and i16 %and1, %b |
| %ret_cast = bitcast i16 %and2 to half |
| ret half %ret_cast |
| } |
| |
| define amdgpu_ps half @test_xor3_b16(i16 %a, i16 %b, i16 %c) { |
| ; GFX950-SDAG-LABEL: test_xor3_b16: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b16 v0, v0, v2, v1 bitop3:0x96 |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: test_xor3_b16: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %xor1 = xor i16 %a, %b |
| %xor2 = xor i16 %xor1, %c |
| %ret_cast = bitcast i16 %xor2 to half |
| ret half %ret_cast |
| } |
| |
| define amdgpu_ps half @test_or3_b16(i16 %a, i16 %b, i16 %c) { |
| ; GFX950-SDAG-LABEL: test_or3_b16: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b16 v0, v0, v2, v1 bitop3:0xfe |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: test_or3_b16: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %or1 = or i16 %a, %b |
| %or2 = or i16 %or1, %c |
| %ret_cast = bitcast i16 %or2 to half |
| ret half %ret_cast |
| } |
| |
| define amdgpu_ps half @test_and_or_b16(i16 %a, i16 %b, i16 %c) { |
| ; GFX950-SDAG-LABEL: test_and_or_b16: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: v_bitop3_b16 v0, v0, v2, v1 bitop3:0xec |
| ; GFX950-SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GFX950-GISEL-LABEL: test_and_or_b16: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: v_and_or_b32 v0, v0, v1, v2 |
| ; GFX950-GISEL-NEXT: ; return to shader part epilog |
| %and1 = and i16 %a, %b |
| %or1 = or i16 %and1, %c |
| %ret_cast = bitcast i16 %or1 to half |
| ret half %ret_cast |
| } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; GFX950: {{.*}} |