| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s |
| ; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s |
| |
| target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" |
| target triple = "amdgcn--" |
| |
| ;; This should optimize to just the offset part |
| define float @sum(ptr addrspace(8) %buf, i32 %len) { |
| ; CHECK-LABEL: define float @sum |
| ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) |
| ; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] |
| ; CHECK-NEXT: [[PTR]] = add i32 [[PTR_PREV_OFF]], 4 |
| ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 |
| ; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] |
| ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret float [[SUM]] |
| ; |
| entry: |
| %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) |
| br label %loop |
| loop: |
| %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ] |
| %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ] |
| %i = phi i32 [ %i.next, %loop ], [ 0, %entry ] |
| |
| %val = load float, ptr addrspace(7) %ptr.prev |
| %sum = fadd float %sum.prev, %val |
| |
| %ptr = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 |
| %i.next = add i32 %i, 1 |
| %test = icmp ult i32 %i.next, %len |
| br i1 %test, label %loop, label %exit |
| exit: |
| ret float %sum |
| } |
| |
| ;; But this should not |
| define float @sum_integer_ops(ptr addrspace(8) %buf, i32 %len) { |
| ; CHECK-LABEL: define float @sum_integer_ops |
| ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF]], [[ENTRY]] ] |
| ; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) |
| ; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] |
| ; CHECK-NEXT: [[PTR_PREV_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_PREV_RSRC]] to i160 |
| ; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i160 [[PTR_PREV_INT_RSRC]], 32 |
| ; CHECK-NEXT: [[PTR_PREV_INT_OFF:%.*]] = zext i32 [[PTR_PREV_OFF]] to i160 |
| ; CHECK-NEXT: [[PTR_PREV_INT:%.*]] = or i160 [[TMP0]], [[PTR_PREV_INT_OFF]] |
| ; CHECK-NEXT: [[PTR_INT:%.*]] = add i160 [[PTR_PREV_INT]], 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[PTR_INT]], 32 |
| ; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128 |
| ; CHECK-NEXT: [[PTR_RSRC]] = inttoptr i128 [[TMP2]] to ptr addrspace(8) |
| ; CHECK-NEXT: [[PTR_OFF]] = trunc i160 [[PTR_INT]] to i32 |
| ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 |
| ; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] |
| ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret float [[SUM]] |
| ; |
| entry: |
| %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) |
| br label %loop |
| loop: |
| %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ] |
| %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ] |
| %i = phi i32 [ %i.next, %loop ], [ 0, %entry ] |
| |
| %val = load float, ptr addrspace(7) %ptr.prev |
| %sum = fadd float %sum.prev, %val |
| |
| %ptr.prev.int = ptrtoint ptr addrspace(7) %ptr.prev to i160 |
| %ptr.int = add i160 %ptr.prev.int, 4 |
| %ptr = inttoptr i160 %ptr.int to ptr addrspace(7) |
| %i.next = add i32 %i, 1 |
| %test = icmp ult i32 %i.next, %len |
| br i1 %test, label %loop, label %exit |
| exit: |
| ret float %sum |
| } |
| |
| ;; Should go to offsets only |
| define float @sum_2d(ptr addrspace(8) %buf, i32 %ii, i32 %jj) { |
| ; CHECK-LABEL: define float @sum_2d |
| ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[II:%.*]], i32 [[JJ:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[LOOP1_ENTRY:%.*]] |
| ; CHECK: loop1.entry: |
| ; CHECK-NEXT: [[SUM1_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP1_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP1_EXIT]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[PTR1_PREV_OFF:%.*]] = phi i32 [ [[PTR1:%.*]], [[LOOP1_EXIT]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: br label [[LOOP2:%.*]] |
| ; CHECK: loop2: |
| ; CHECK-NEXT: [[SUM2_PREV:%.*]] = phi float [ [[SUM]], [[LOOP2]] ], [ [[SUM1_PREV]], [[LOOP1_ENTRY]] ] |
| ; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[LOOP2]] ], [ 0, [[LOOP1_ENTRY]] ] |
| ; CHECK-NEXT: [[PTR2_PREV_OFF:%.*]] = phi i32 [ [[PTR2:%.*]], [[LOOP2]] ], [ [[PTR1_PREV_OFF]], [[LOOP1_ENTRY]] ] |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR2_PREV_OFF]], i32 0, i32 0) |
| ; CHECK-NEXT: [[SUM]] = fadd float [[SUM2_PREV]], [[VAL]] |
| ; CHECK-NEXT: [[PTR2]] = add i32 [[PTR2_PREV_OFF]], 4 |
| ; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 |
| ; CHECK-NEXT: [[TEST2:%.*]] = icmp ult i32 [[J_NEXT]], [[JJ]] |
| ; CHECK-NEXT: br i1 [[TEST2]], label [[LOOP2]], label [[LOOP1_EXIT]] |
| ; CHECK: loop1.exit: |
| ; CHECK-NEXT: [[PTR1]] = add i32 [[PTR2]], 4 |
| ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 |
| ; CHECK-NEXT: [[TEST1:%.*]] = icmp ult i32 [[I_NEXT]], [[II]] |
| ; CHECK-NEXT: br i1 [[TEST1]], label [[LOOP1_ENTRY]], label [[EXIT:%.*]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret float [[SUM]] |
| ; |
| entry: |
| %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) |
| br label %loop1.entry |
| loop1.entry: |
| %sum1.prev = phi float [ %sum, %loop1.exit ], [ 0.0, %entry ] |
| %ptr1.prev = phi ptr addrspace(7) [ %ptr1, %loop1.exit ], [ %start, %entry ] |
| %i = phi i32 [ %i.next, %loop1.exit ], [ 0, %entry ] |
| |
| br label %loop2 |
| loop2: |
| %sum2.prev = phi float [ %sum, %loop2 ], [ %sum1.prev, %loop1.entry ] |
| %ptr2.prev = phi ptr addrspace(7) [ %ptr2, %loop2 ], [ %ptr1.prev, %loop1.entry ] |
| %j = phi i32 [ %j.next, %loop2 ], [ 0, %loop1.entry ] |
| |
| %val = load float, ptr addrspace(7) %ptr2.prev |
| %sum = fadd float %sum2.prev, %val |
| |
| %ptr2 = getelementptr float, ptr addrspace(7) %ptr2.prev, i32 1 |
| %j.next = add i32 %j, 1 |
| %test2 = icmp ult i32 %j.next, %jj |
| |
| br i1 %test2, label %loop2, label %loop1.exit |
| loop1.exit: |
| %ptr1 = getelementptr float, ptr addrspace(7) %ptr2, i32 1 |
| %i.next = add i32 %i, 1 |
| %test1 = icmp ult i32 %i.next, %ii |
| br i1 %test1, label %loop1.entry, label %exit |
| exit: |
| ret float %sum |
| } |
| |
| ;; This should optimize to just the offset parts since all the arguments to the |
| ;; select point to the same buffer. |
| define float @sum_jump_on_negative(ptr addrspace(8) %buf, i32 %len) { |
| ; CHECK-LABEL: define float @sum_jump_on_negative |
| ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) |
| ; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] |
| ; CHECK-NEXT: [[SKIP_NEXT:%.*]] = fcmp olt float [[VAL]], 0.000000e+00 |
| ; CHECK-NEXT: [[SMALL_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 4 |
| ; CHECK-NEXT: [[LARGE_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 8 |
| ; CHECK-NEXT: [[PTR_OFF]] = select i1 [[SKIP_NEXT]], i32 [[LARGE_JUMP]], i32 [[SMALL_JUMP]] |
| ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 |
| ; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] |
| ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret float [[SUM]] |
| ; |
| entry: |
| %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) |
| br label %loop |
| loop: |
| %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ] |
| %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ] |
| %i = phi i32 [ %i.next, %loop ], [ 0, %entry ] |
| |
| %val = load float, ptr addrspace(7) %ptr.prev |
| %sum = fadd float %sum.prev, %val |
| |
| %skip.next = fcmp olt float %val, 0.0 |
| %small.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 |
| %large.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 2 |
| %ptr = select i1 %skip.next, ptr addrspace(7) %large.jump, ptr addrspace(7) %small.jump |
| |
| %i.next = add i32 %i, 1 |
| %test = icmp ult i32 %i.next, %len |
| br i1 %test, label %loop, label %exit |
| exit: |
| ret float %sum |
| } |
| |
| define float @sum_jump_on_negative_with_phi(ptr addrspace(8) %buf, i32 %len) { |
| ; CHECK-LABEL: define float @sum_jump_on_negative_with_phi |
| ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) |
| ; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] |
| ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 |
| ; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] |
| ; CHECK-NEXT: [[SKIP_NEXT:%.*]] = fcmp olt float [[VAL]], 0.000000e+00 |
| ; CHECK-NEXT: br i1 [[SKIP_NEXT]], label [[THEN:%.*]], label [[ELSE:%.*]] |
| ; CHECK: then: |
| ; CHECK-NEXT: [[LARGE_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 8 |
| ; CHECK-NEXT: br label [[LOOP_EXIT]] |
| ; CHECK: else: |
| ; CHECK-NEXT: [[SMALL_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 4 |
| ; CHECK-NEXT: br label [[LOOP_EXIT]] |
| ; CHECK: loop.exit: |
| ; CHECK-NEXT: [[PTR_OFF]] = phi i32 [ [[LARGE_JUMP]], [[THEN]] ], [ [[SMALL_JUMP]], [[ELSE]] ] |
| ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret float [[SUM]] |
| ; |
| entry: |
| %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) |
| br label %loop |
| loop: |
| %sum.prev = phi float [ %sum, %loop.exit ], [ 0.0, %entry ] |
| %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop.exit ], [ %start, %entry ] |
| %i = phi i32 [ %i.next, %loop.exit ], [ 0, %entry ] |
| |
| %val = load float, ptr addrspace(7) %ptr.prev |
| %sum = fadd float %sum.prev, %val |
| |
| %i.next = add i32 %i, 1 |
| %test = icmp ult i32 %i.next, %len |
| |
| %skip.next = fcmp olt float %val, 0.0 |
| br i1 %skip.next, label %then, label %else |
| then: |
| %large.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 2 |
| br label %loop.exit |
| else: |
| %small.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 |
| br label %loop.exit |
| loop.exit: |
| %ptr = phi ptr addrspace(7) [ %large.jump, %then ], [ %small.jump, %else ] |
| br i1 %test, label %loop, label %exit |
| exit: |
| ret float %sum |
| } |
| |
| ;; But this has a shifting resource part. |
| define float @sum_new_buffer_on_negative(ptr addrspace(8) %buf1, ptr addrspace(8) %buf2, i32 %len) { |
| ; CHECK-LABEL: define float @sum_new_buffer_on_negative |
| ; CHECK-SAME: (ptr addrspace(8) [[BUF1:%.*]], ptr addrspace(8) [[BUF2:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF1]], [[ENTRY]] ] |
| ; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) |
| ; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] |
| ; CHECK-NEXT: [[HOP:%.*]] = fcmp olt float [[VAL]], 0.000000e+00 |
| ; CHECK-NEXT: [[THIS_NEXT:%.*]] = add i32 [[PTR_PREV_OFF]], 4 |
| ; CHECK-NEXT: [[PTR_RSRC]] = select i1 [[HOP]], ptr addrspace(8) [[PTR_PREV_RSRC]], ptr addrspace(8) [[BUF2]] |
| ; CHECK-NEXT: [[PTR_OFF]] = select i1 [[HOP]], i32 [[THIS_NEXT]], i32 0 |
| ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 |
| ; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] |
| ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret float [[SUM]] |
| ; |
| entry: |
| %start = addrspacecast ptr addrspace(8) %buf1 to ptr addrspace(7) |
| %start2 = addrspacecast ptr addrspace(8) %buf2 to ptr addrspace(7) |
| br label %loop |
| loop: |
| %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ] |
| %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ] |
| %i = phi i32 [ %i.next, %loop ], [ 0, %entry ] |
| |
| %val = load float, ptr addrspace(7) %ptr.prev |
| %sum = fadd float %sum.prev, %val |
| |
| %hop = fcmp olt float %val, 0.0 |
| %this.next = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 |
| %ptr = select i1 %hop, ptr addrspace(7) %this.next, ptr addrspace(7) %start2 |
| |
| %i.next = add i32 %i, 1 |
| %test = icmp ult i32 %i.next, %len |
| br i1 %test, label %loop, label %exit |
| exit: |
| ret float %sum |
| } |
| |
| ;; As does this. |
| define float @sum_new_buffer_on_negative_with_phi(ptr addrspace(8) %buf1, ptr addrspace(8) %buf2, i32 %len) { |
| ; CHECK-LABEL: define float @sum_new_buffer_on_negative_with_phi |
| ; CHECK-SAME: (ptr addrspace(8) [[BUF1:%.*]], ptr addrspace(8) [[BUF2:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP_EXIT]] ], [ [[BUF1]], [[ENTRY]] ] |
| ; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) |
| ; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] |
| ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 |
| ; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] |
| ; CHECK-NEXT: [[HOP:%.*]] = fcmp olt float [[VAL]], 0.000000e+00 |
| ; CHECK-NEXT: br i1 [[HOP]], label [[THEN:%.*]], label [[LOOP_EXIT]] |
| ; CHECK: then: |
| ; CHECK-NEXT: [[THIS_NEXT:%.*]] = add i32 [[PTR_PREV_OFF]], 4 |
| ; CHECK-NEXT: br label [[LOOP_EXIT]] |
| ; CHECK: loop.exit: |
| ; CHECK-NEXT: [[PTR_RSRC]] = phi ptr addrspace(8) [ [[PTR_PREV_RSRC]], [[THEN]] ], [ [[BUF2]], [[LOOP]] ] |
| ; CHECK-NEXT: [[PTR_OFF]] = phi i32 [ [[THIS_NEXT]], [[THEN]] ], [ 0, [[LOOP]] ] |
| ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret float [[SUM]] |
| ; |
| entry: |
| %start = addrspacecast ptr addrspace(8) %buf1 to ptr addrspace(7) |
| %start2 = addrspacecast ptr addrspace(8) %buf2 to ptr addrspace(7) |
| br label %loop |
| loop: |
| %sum.prev = phi float [ %sum, %loop.exit ], [ 0.0, %entry ] |
| %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop.exit ], [ %start, %entry ] |
| %i = phi i32 [ %i.next, %loop.exit ], [ 0, %entry ] |
| |
| %val = load float, ptr addrspace(7) %ptr.prev |
| %sum = fadd float %sum.prev, %val |
| |
| %i.next = add i32 %i, 1 |
| %test = icmp ult i32 %i.next, %len |
| %hop = fcmp olt float %val, 0.0 |
| br i1 %hop, label %then, label %loop.exit |
| then: |
| %this.next = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 |
| br label %loop.exit |
| loop.exit: |
| %ptr = phi ptr addrspace(7) [ %this.next, %then ], [ %start2, %loop ] |
| br i1 %test, label %loop, label %exit |
| exit: |
| ret float %sum |
| } |
| |
| ;; Test that the uniform buffer descriptor optimization works correctly for phi |
| ;; nodes that repeat the same predecessor multiple times. |
| define float @sum_duplicate_preds(ptr addrspace(8) %buf, i32 %len) { |
| ; CHECK-LABEL: define float @sum_duplicate_preds |
| ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ [[SUM]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[I_NEXT]], [[LOOP]] ] |
| ; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[PTR]], [[LOOP]] ] |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) |
| ; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] |
| ; CHECK-NEXT: [[PTR]] = add i32 [[PTR_PREV_OFF]], 4 |
| ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 |
| ; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] |
| ; CHECK-NEXT: [[TEST_EXT:%.*]] = zext i1 [[TEST]] to i32 |
| ; CHECK-NEXT: switch i32 [[TEST_EXT]], label [[LOOP]] [ |
| ; CHECK-NEXT: i32 1, label [[LOOP]] |
| ; CHECK-NEXT: i32 0, label [[EXIT:%.*]] |
| ; CHECK-NEXT: ] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret float [[SUM]] |
| ; |
| entry: |
| %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) |
| br label %loop |
| loop: |
| %sum.prev = phi float [ %sum, %loop ], [ %sum, %loop ], [ 0.0, %entry ] |
| %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ], [ %ptr, %loop ] |
| %i = phi i32 [ %i.next, %loop ], [ 0, %entry ], [ %i.next, %loop ] |
| |
| %val = load float, ptr addrspace(7) %ptr.prev |
| %sum = fadd float %sum.prev, %val |
| |
| %ptr = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 |
| %i.next = add i32 %i, 1 |
| %test = icmp ult i32 %i.next, %len |
| %test.ext = zext i1 %test to i32 |
| switch i32 %test.ext, label %loop [ |
| i32 1, label %loop |
| i32 0, label %exit |
| ] |
| exit: |
| ret float %sum |
| } |
| |
| ;; And similirly check the "might not be uniform" case. |
| define float @sum_integer_ops_duplicate_preds(ptr addrspace(8) %buf, i32 %len) { |
| ; CHECK-LABEL: define float @sum_integer_ops_duplicate_preds |
| ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ [[SUM]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[I_NEXT]], [[LOOP]] ] |
| ; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF]], [[ENTRY]] ], [ [[PTR_RSRC]], [[LOOP]] ] |
| ; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[PTR_OFF]], [[LOOP]] ] |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) |
| ; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] |
| ; CHECK-NEXT: [[PTR_PREV_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_PREV_RSRC]] to i160 |
| ; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i160 [[PTR_PREV_INT_RSRC]], 32 |
| ; CHECK-NEXT: [[PTR_PREV_INT_OFF:%.*]] = zext i32 [[PTR_PREV_OFF]] to i160 |
| ; CHECK-NEXT: [[PTR_PREV_INT:%.*]] = or i160 [[TMP0]], [[PTR_PREV_INT_OFF]] |
| ; CHECK-NEXT: [[PTR_INT:%.*]] = add i160 [[PTR_PREV_INT]], 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[PTR_INT]], 32 |
| ; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128 |
| ; CHECK-NEXT: [[PTR_RSRC]] = inttoptr i128 [[TMP2]] to ptr addrspace(8) |
| ; CHECK-NEXT: [[PTR_OFF]] = trunc i160 [[PTR_INT]] to i32 |
| ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 |
| ; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] |
| ; CHECK-NEXT: [[TEST_EXT:%.*]] = zext i1 [[TEST]] to i32 |
| ; CHECK-NEXT: switch i32 [[TEST_EXT]], label [[LOOP]] [ |
| ; CHECK-NEXT: i32 1, label [[LOOP]] |
| ; CHECK-NEXT: i32 0, label [[EXIT:%.*]] |
| ; CHECK-NEXT: ] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret float [[SUM]] |
| ; |
| entry: |
| %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) |
| br label %loop |
| loop: |
| %sum.prev = phi float [ %sum, %loop ], [ %sum, %loop ], [ 0.0, %entry ] |
| %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ], [ %ptr, %loop ] |
| %i = phi i32 [ %i.next, %loop ], [ 0, %entry ], [ %i.next, %loop ] |
| |
| %val = load float, ptr addrspace(7) %ptr.prev |
| %sum = fadd float %sum.prev, %val |
| |
| %ptr.prev.int = ptrtoint ptr addrspace(7) %ptr.prev to i160 |
| %ptr.int = add i160 %ptr.prev.int, 4 |
| %ptr = inttoptr i160 %ptr.int to ptr addrspace(7) |
| %i.next = add i32 %i, 1 |
| %test = icmp ult i32 %i.next, %len |
| %test.ext = zext i1 %test to i32 |
| switch i32 %test.ext, label %loop [ |
| i32 1, label %loop |
| i32 0, label %exit |
| ] |
| exit: |
| ret float %sum |
| } |