| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s |
| ; RUN: %if ptxas-sm_70 && ptxas-isa-8.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %} |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| @size = internal addrspace(1) global i32 0, align 4 |
| @x = internal addrspace(1) global i128 0, align 16 |
| |
| define void @test_b128_in_loop() { |
| ; CHECK-LABEL: test_b128_in_loop( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-NEXT: .reg .b128 %rq<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.global.s32 %rd1, [size]; |
| ; CHECK-NEXT: setp.eq.b64 %p1, %rd1, 0; |
| ; CHECK-NEXT: @%p1 bra $L__BB0_3; |
| ; CHECK-NEXT: // %bb.1: // %BB1 |
| ; CHECK-NEXT: ld.global.v2.b64 {%rd2, %rd3}, [x]; |
| ; CHECK-NEXT: mov.b64 %rd4, 0; |
| ; CHECK-NEXT: $L__BB0_2: // %BB2 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: mov.b128 %rq1, {%rd2, %rd3}; |
| ; CHECK-NEXT: // begin inline asm |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b64 lo; |
| ; CHECK-NEXT: .reg .b64 hi; |
| ; CHECK-NEXT: mov.b128 {lo, hi}, %rq1; |
| ; CHECK-NEXT: add.cc.u64 lo, lo, %rd4; |
| ; CHECK-NEXT: mov.b128 %rq1, {lo, hi}; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: // end inline asm |
| ; CHECK-NEXT: mov.b128 {%rd2, %rd3}, %rq1; |
| ; CHECK-NEXT: st.global.v2.b64 [x], {%rd2, %rd3}; |
| ; CHECK-NEXT: add.s64 %rd4, %rd4, 1; |
| ; CHECK-NEXT: setp.ne.b64 %p2, %rd1, %rd4; |
| ; CHECK-NEXT: @%p2 bra $L__BB0_2; |
| ; CHECK-NEXT: $L__BB0_3: // %BB3 |
| ; CHECK-NEXT: ret; |
| |
| %1 = load i32, ptr addrspace(1) @size, align 4 |
| %2 = icmp eq i32 %1, 0 |
| br i1 %2, label %BB3, label %BB1 |
| |
| BB1: ; preds = %0 |
| %3 = load i128, ptr addrspace(1) @x, align 16 |
| %4 = sext i32 %1 to i64 |
| br label %BB2 |
| |
| BB2: ; preds = %BB2, %BB1 |
| %5 = phi i128 [ %7, %BB2 ], [ %3, %BB1 ] |
| %6 = phi i64 [ %9, %BB2 ], [ 0, %BB1 ] |
| %7 = tail call i128 asm "{\0A\09.reg .b64 lo;\0A\09.reg .b64 hi;\0A\09mov.b128 {lo, hi}, $0;\0A\09add.cc.u64 lo, lo, $1;\0A\09mov.b128 $0, {lo, hi};\0A\09}", "=q,l,0"(i64 %6, i128 %5) |
| %8 = bitcast i128 %7 to <2 x i64> |
| store <2 x i64> %8, ptr addrspace(1) @x, align 16 |
| %9 = add nuw i64 %6, 1 |
| %10 = icmp eq i64 %9, %4 |
| br i1 %10, label %BB3, label %BB2 |
| |
| BB3: ; preds = %BB2, %0 |
| ret void |
| } |