| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: not llc < %s -mtriple=nvptx -mattr=+ptx72 -mcpu=sm_52 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS |
| ; RUN: not llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_50 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS |
| |
| ; RUN: llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-32 |
| ; RUN: llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-64 |
| ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %} |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %} |
| |
| ; CHECK-FAILS: in function test_dynamic_stackalloc{{.*}}: Support for dynamic alloca introduced in PTX ISA version 7.3 and requires target sm_52. |
| |
| define i32 @test_dynamic_stackalloc(i64 %n) { |
| ; CHECK-32-LABEL: test_dynamic_stackalloc( |
| ; CHECK-32: { |
| ; CHECK-32-NEXT: .reg .b32 %r<8>; |
| ; CHECK-32-EMPTY: |
| ; CHECK-32-NEXT: // %bb.0: |
| ; CHECK-32-NEXT: ld.param.b32 %r1, [test_dynamic_stackalloc_param_0]; |
| ; CHECK-32-NEXT: add.s32 %r2, %r1, 7; |
| ; CHECK-32-NEXT: and.b32 %r3, %r2, -8; |
| ; CHECK-32-NEXT: alloca.u32 %r4, %r3, 16; |
| ; CHECK-32-NEXT: cvta.local.u32 %r5, %r4; |
| ; CHECK-32-NEXT: { // callseq 0, 0 |
| ; CHECK-32-NEXT: .param .b32 param0; |
| ; CHECK-32-NEXT: st.param.b32 [param0], %r5; |
| ; CHECK-32-NEXT: .param .b32 retval0; |
| ; CHECK-32-NEXT: call.uni (retval0), |
| ; CHECK-32-NEXT: bar, |
| ; CHECK-32-NEXT: ( |
| ; CHECK-32-NEXT: param0 |
| ; CHECK-32-NEXT: ); |
| ; CHECK-32-NEXT: ld.param.b32 %r6, [retval0]; |
| ; CHECK-32-NEXT: } // callseq 0 |
| ; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-32-NEXT: ret; |
| ; |
| ; CHECK-64-LABEL: test_dynamic_stackalloc( |
| ; CHECK-64: { |
| ; CHECK-64-NEXT: .reg .b32 %r<3>; |
| ; CHECK-64-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-64-EMPTY: |
| ; CHECK-64-NEXT: // %bb.0: |
| ; CHECK-64-NEXT: ld.param.b64 %rd1, [test_dynamic_stackalloc_param_0]; |
| ; CHECK-64-NEXT: add.s64 %rd2, %rd1, 7; |
| ; CHECK-64-NEXT: and.b64 %rd3, %rd2, -8; |
| ; CHECK-64-NEXT: alloca.u64 %rd4, %rd3, 16; |
| ; CHECK-64-NEXT: cvta.local.u64 %rd5, %rd4; |
| ; CHECK-64-NEXT: { // callseq 0, 0 |
| ; CHECK-64-NEXT: .param .b64 param0; |
| ; CHECK-64-NEXT: st.param.b64 [param0], %rd5; |
| ; CHECK-64-NEXT: .param .b32 retval0; |
| ; CHECK-64-NEXT: call.uni (retval0), |
| ; CHECK-64-NEXT: bar, |
| ; CHECK-64-NEXT: ( |
| ; CHECK-64-NEXT: param0 |
| ; CHECK-64-NEXT: ); |
| ; CHECK-64-NEXT: ld.param.b32 %r1, [retval0]; |
| ; CHECK-64-NEXT: } // callseq 0 |
| ; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1; |
| ; CHECK-64-NEXT: ret; |
| %alloca = alloca i8, i64 %n, align 16 |
| %call = call i32 @bar(ptr %alloca) |
| ret i32 %call |
| } |
| |
| define float @test_dynamic_stackalloc_unaligned(i64 %0) { |
| ; CHECK-32-LABEL: test_dynamic_stackalloc_unaligned( |
| ; CHECK-32: { |
| ; CHECK-32-NEXT: .reg .b32 %r<7>; |
| ; CHECK-32-EMPTY: |
| ; CHECK-32-NEXT: // %bb.0: |
| ; CHECK-32-NEXT: ld.param.b32 %r1, [test_dynamic_stackalloc_unaligned_param_0]; |
| ; CHECK-32-NEXT: shl.b32 %r2, %r1, 2; |
| ; CHECK-32-NEXT: add.s32 %r3, %r2, 7; |
| ; CHECK-32-NEXT: and.b32 %r4, %r3, -8; |
| ; CHECK-32-NEXT: alloca.u32 %r5, %r4, 8; |
| ; CHECK-32-NEXT: ld.local.b32 %r6, [%r5]; |
| ; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6; |
| ; CHECK-32-NEXT: ret; |
| ; |
| ; CHECK-64-LABEL: test_dynamic_stackalloc_unaligned( |
| ; CHECK-64: { |
| ; CHECK-64-NEXT: .reg .b32 %r<2>; |
| ; CHECK-64-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-64-EMPTY: |
| ; CHECK-64-NEXT: // %bb.0: |
| ; CHECK-64-NEXT: ld.param.b64 %rd1, [test_dynamic_stackalloc_unaligned_param_0]; |
| ; CHECK-64-NEXT: shl.b64 %rd2, %rd1, 2; |
| ; CHECK-64-NEXT: add.s64 %rd3, %rd2, 7; |
| ; CHECK-64-NEXT: and.b64 %rd4, %rd3, -8; |
| ; CHECK-64-NEXT: alloca.u64 %rd5, %rd4, 8; |
| ; CHECK-64-NEXT: ld.local.b32 %r1, [%rd5]; |
| ; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1; |
| ; CHECK-64-NEXT: ret; |
| %4 = alloca float, i64 %0, align 4 |
| %5 = getelementptr float, ptr %4, i64 0 |
| %6 = load float, ptr %5, align 4 |
| ret float %6 |
| } |
| |
| declare i32 @bar(ptr) |
| |