blob: 678fde86586645936903975601484b7f78b1e561 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
declare {i32, i1} @llvm.nvvm.shfl.down.i32p(i32, i32, i32)
declare {float, i1} @llvm.nvvm.shfl.down.f32p(float, i32, i32)
declare {i32, i1} @llvm.nvvm.shfl.up.i32p(i32, i32, i32)
declare {float, i1} @llvm.nvvm.shfl.up.f32p(float, i32, i32)
declare {i32, i1} @llvm.nvvm.shfl.bfly.i32p(i32, i32, i32)
declare {float, i1} @llvm.nvvm.shfl.bfly.f32p(float, i32, i32)
declare {i32, i1} @llvm.nvvm.shfl.idx.i32p(i32, i32, i32)
declare {float, i1} @llvm.nvvm.shfl.idx.f32p(float, i32, i32)
define {i32, i1} @shfl_i32_rrr(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: shfl_i32_rrr(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_rrr_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_rrr_param_1];
; CHECK-NEXT: ld.param.b32 %r3, [shfl_i32_rrr_param_2];
; CHECK-NEXT: shfl.down.b32 %r4|%p1, %r1, %r2, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 %c)
ret {i32, i1} %val
}
define {i32, i1} @shfl_i32_irr(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: shfl_i32_irr(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_irr_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_irr_param_1];
; CHECK-NEXT: ld.param.b32 %r3, [shfl_i32_irr_param_2];
; CHECK-NEXT: shfl.down.b32 %r4|%p1, %r1, %r2, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 %c)
ret {i32, i1} %val
}
define {i32, i1} @shfl_i32_rri(i32 %a, i32 %b) {
; CHECK-LABEL: shfl_i32_rri(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_rri_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_rri_param_1];
; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, %r2, 1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 1)
ret {i32, i1} %val
}
define {i32, i1} @shfl_i32_iri(i32 %a, i32 %b) {
; CHECK-LABEL: shfl_i32_iri(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_iri_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_iri_param_1];
; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, %r2, 2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 2)
ret {i32, i1} %val
}
define {i32, i1} @shfl_i32_rir(i32 %a, i32 %c) {
; CHECK-LABEL: shfl_i32_rir(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_rir_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_rir_param_1];
; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, 1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 1, i32 %c)
ret {i32, i1} %val
}
define {i32, i1} @shfl_i32_iir(i32 %a, i32 %c) {
; CHECK-LABEL: shfl_i32_iir(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_iir_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_i32_iir_param_1];
; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, 2, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 2, i32 %c)
ret {i32, i1} %val
}
define {i32, i1} @shfl_i32_rii(i32 %a) {
; CHECK-LABEL: shfl_i32_rii(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_rii_param_0];
; CHECK-NEXT: shfl.down.b32 %r2|%p1, %r1, 1, 2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 1, i32 2)
ret {i32, i1} %val
}
define {i32, i1} @shfl_i32_iii(i32 %a, i32 %b) {
; CHECK-LABEL: shfl_i32_iii(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_i32_iii_param_0];
; CHECK-NEXT: shfl.down.b32 %r2|%p1, %r1, 2, 3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 2, i32 3)
ret {i32, i1} %val
}
;; Same intrinsics, but for float
define {float, i1} @shfl_f32_rrr(float %a, i32 %b, i32 %c) {
; CHECK-LABEL: shfl_f32_rrr(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_rrr_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_rrr_param_1];
; CHECK-NEXT: ld.param.b32 %r3, [shfl_f32_rrr_param_2];
; CHECK-NEXT: shfl.down.b32 %r4|%p1, %r1, %r2, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 %c)
ret {float, i1} %val
}
define {float, i1} @shfl_f32_irr(float %a, i32 %b, i32 %c) {
; CHECK-LABEL: shfl_f32_irr(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_irr_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_irr_param_1];
; CHECK-NEXT: ld.param.b32 %r3, [shfl_f32_irr_param_2];
; CHECK-NEXT: shfl.down.b32 %r4|%p1, %r1, %r2, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 %c)
ret {float, i1} %val
}
define {float, i1} @shfl_f32_rri(float %a, i32 %b) {
; CHECK-LABEL: shfl_f32_rri(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_rri_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_rri_param_1];
; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, %r2, 1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 1)
ret {float, i1} %val
}
define {float, i1} @shfl_f32_iri(float %a, i32 %b) {
; CHECK-LABEL: shfl_f32_iri(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_iri_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_iri_param_1];
; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, %r2, 2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 2)
ret {float, i1} %val
}
define {float, i1} @shfl_f32_rir(float %a, i32 %c) {
; CHECK-LABEL: shfl_f32_rir(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_rir_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_rir_param_1];
; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, 1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 1, i32 %c)
ret {float, i1} %val
}
define {float, i1} @shfl_f32_iir(float %a, i32 %c) {
; CHECK-LABEL: shfl_f32_iir(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_iir_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [shfl_f32_iir_param_1];
; CHECK-NEXT: shfl.down.b32 %r3|%p1, %r1, 2, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 2, i32 %c)
ret {float, i1} %val
}
define {float, i1} @shfl_f32_rii(float %a) {
; CHECK-LABEL: shfl_f32_rii(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_rii_param_0];
; CHECK-NEXT: shfl.down.b32 %r2|%p1, %r1, 1, 2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 1, i32 2)
ret {float, i1} %val
}
define {float, i1} @shfl_f32_iii(float %a, i32 %b) {
; CHECK-LABEL: shfl_f32_iii(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [shfl_f32_iii_param_0];
; CHECK-NEXT: shfl.down.b32 %r2|%p1, %r1, 2, 3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
; CHECK-NEXT: ret;
%val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 2, i32 3)
ret {float, i1} %val
}