| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 | FileCheck %s |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 | %ptxas-verify %} |
| |
| declare {i32, i1} @llvm.nvvm.shfl.down.i32p(i32, i32, i32) |
| declare {float, i1} @llvm.nvvm.shfl.down.f32p(float, i32, i32) |
| declare {i32, i1} @llvm.nvvm.shfl.up.i32p(i32, i32, i32) |
| declare {float, i1} @llvm.nvvm.shfl.up.f32p(float, i32, i32) |
| declare {i32, i1} @llvm.nvvm.shfl.bfly.i32p(i32, i32, i32) |
| declare {float, i1} @llvm.nvvm.shfl.bfly.f32p(float, i32, i32) |
| declare {i32, i1} @llvm.nvvm.shfl.idx.i32p(i32, i32, i32) |
| declare {float, i1} @llvm.nvvm.shfl.idx.f32p(float, i32, i32) |
| |
| define {i32, i1} @shfl_i32_rrr(i32 %a, i32 %b, i32 %c) { |
| ; CHECK-LABEL: shfl_i32_rrr( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_rrr_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_rrr_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [shfl_i32_rrr_param_2]; |
| ; CHECK-NEXT: shfl.down.b32 %r4|%p1, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 %c) |
| ret {i32, i1} %val |
| } |
| |
| define {i32, i1} @shfl_i32_irr(i32 %a, i32 %b, i32 %c) { |
| ; CHECK-LABEL: shfl_i32_irr( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_irr_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_irr_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [shfl_i32_irr_param_2]; |
| ; CHECK-NEXT: shfl.down.b32 %r4|%p1, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 %c) |
| ret {i32, i1} %val |
| } |
| |
| define {i32, i1} @shfl_i32_rri(i32 %a, i32 %b) { |
| ; CHECK-LABEL: shfl_i32_rri( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_rri_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_rri_param_1]; |
| ; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, %r2, 1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 1) |
| ret {i32, i1} %val |
| } |
| |
| define {i32, i1} @shfl_i32_iri(i32 %a, i32 %b) { |
| ; CHECK-LABEL: shfl_i32_iri( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_iri_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_iri_param_1]; |
| ; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, %r2, 2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 2) |
| ret {i32, i1} %val |
| } |
| |
| define {i32, i1} @shfl_i32_rir(i32 %a, i32 %c) { |
| ; CHECK-LABEL: shfl_i32_rir( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_rir_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_rir_param_1]; |
| ; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, 1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 1, i32 %c) |
| ret {i32, i1} %val |
| } |
| |
| define {i32, i1} @shfl_i32_iir(i32 %a, i32 %c) { |
| ; CHECK-LABEL: shfl_i32_iir( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_iir_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_iir_param_1]; |
| ; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, 2, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 2, i32 %c) |
| ret {i32, i1} %val |
| } |
| |
| define {i32, i1} @shfl_i32_rii(i32 %a) { |
| ; CHECK-LABEL: shfl_i32_rii( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_rii_param_0]; |
| ; CHECK-NEXT: shfl.down.b32 %r2|%p1, %r1, 1, 2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 1, i32 2) |
| ret {i32, i1} %val |
| } |
| |
| define {i32, i1} @shfl_i32_iii(i32 %a, i32 %b) { |
| ; CHECK-LABEL: shfl_i32_iii( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_iii_param_0]; |
| ; CHECK-NEXT: shfl.down.b32 %r2|%p1, %r1, 2, 3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 2, i32 3) |
| ret {i32, i1} %val |
| } |
| |
| ;; Same intrinsics, but for float |
| |
| define {float, i1} @shfl_f32_rrr(float %a, i32 %b, i32 %c) { |
| ; CHECK-LABEL: shfl_f32_rrr( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_rrr_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_rrr_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [shfl_f32_rrr_param_2]; |
| ; CHECK-NEXT: shfl.down.b32 %r4|%p1, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 %c) |
| ret {float, i1} %val |
| } |
| |
| define {float, i1} @shfl_f32_irr(float %a, i32 %b, i32 %c) { |
| ; CHECK-LABEL: shfl_f32_irr( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_irr_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_irr_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [shfl_f32_irr_param_2]; |
| ; CHECK-NEXT: shfl.down.b32 %r4|%p1, %r1, %r2, %r3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 %c) |
| ret {float, i1} %val |
| } |
| |
| define {float, i1} @shfl_f32_rri(float %a, i32 %b) { |
| ; CHECK-LABEL: shfl_f32_rri( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_rri_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_rri_param_1]; |
| ; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, %r2, 1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 1) |
| ret {float, i1} %val |
| } |
| |
| define {float, i1} @shfl_f32_iri(float %a, i32 %b) { |
| ; CHECK-LABEL: shfl_f32_iri( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_iri_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_iri_param_1]; |
| ; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, %r2, 2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 2) |
| ret {float, i1} %val |
| } |
| |
| define {float, i1} @shfl_f32_rir(float %a, i32 %c) { |
| ; CHECK-LABEL: shfl_f32_rir( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_rir_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_rir_param_1]; |
| ; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, 1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 1, i32 %c) |
| ret {float, i1} %val |
| } |
| |
| define {float, i1} @shfl_f32_iir(float %a, i32 %c) { |
| ; CHECK-LABEL: shfl_f32_iir( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_iir_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_iir_param_1]; |
| ; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, 2, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 2, i32 %c) |
| ret {float, i1} %val |
| } |
| |
| define {float, i1} @shfl_f32_rii(float %a) { |
| ; CHECK-LABEL: shfl_f32_rii( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_rii_param_0]; |
| ; CHECK-NEXT: shfl.down.b32 %r2|%p1, %r1, 1, 2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 1, i32 2) |
| ret {float, i1} %val |
| } |
| |
| define {float, i1} @shfl_f32_iii(float %a, i32 %b) { |
| ; CHECK-LABEL: shfl_f32_iii( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<2>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_iii_param_0]; |
| ; CHECK-NEXT: shfl.down.b32 %r2|%p1, %r1, 2, 3; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; |
| ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; |
| ; CHECK-NEXT: ret; |
| %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 2, i32 3) |
| ret {float, i1} %val |
| } |