| ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s |
| |
| ; DESCRIPTION: |
| ; |
| ; Replace lds globals used within phi instruction. |
| ; |
| |
| ; Original LDS should exist. |
| ; CHECK: @lds.1 = addrspace(3) global i32 undef, align 4 |
| ; CHECK: @lds.2 = addrspace(3) global i32 undef, align 4 |
| @lds.1 = addrspace(3) global i32 undef, align 4 |
| @lds.2 = addrspace(3) global i32 undef, align 4 |
| |
| ; Pointers should be created. |
| ; CHECK: @lds.1.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 |
| ; CHECK: @lds.2.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 |
| |
| define void @f0(i32 %arg) { |
| ; CHECK-LABEL: bb: |
| ; CHECK: %0 = load i16, i16 addrspace(3)* @lds.2.ptr, align 2 |
| ; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0 |
| ; CHECK: %2 = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)* |
| ; CHECK: %3 = load i16, i16 addrspace(3)* @lds.1.ptr, align 2 |
| ; CHECK: %4 = getelementptr i8, i8 addrspace(3)* null, i16 %3 |
| ; CHECK: %5 = bitcast i8 addrspace(3)* %4 to i32 addrspace(3)* |
| ; CHECK: %id = call i32 @llvm.amdgcn.workitem.id.x() |
| ; CHECK: %my.tmp = sub i32 %id, %arg |
| ; CHECK: br label %bb1 |
| bb: |
| %id = call i32 @llvm.amdgcn.workitem.id.x() |
| %my.tmp = sub i32 %id, %arg |
| br label %bb1 |
| |
| ; CHECK-LABEL: bb1: |
| ; CHECK: %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] |
| ; CHECK: %6 = icmp ne i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), %5 |
| ; CHECK: %lsr.iv.next = add i32 %lsr.iv, 1 |
| ; CHECK: %cmp0 = icmp slt i32 %lsr.iv.next, 0 |
| ; CHECK: br i1 %cmp0, label %bb4, label %Flow |
| bb1: |
| %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] |
| %lsr.iv.next = add i32 %lsr.iv, 1 |
| %cmp0 = icmp slt i32 %lsr.iv.next, 0 |
| br i1 %cmp0, label %bb4, label %Flow |
| |
| ; CHECK-LABEL: bb4: |
| ; CHECK: %load = load volatile i32, i32 addrspace(1)* undef, align 4 |
| ; CHECK: %cmp1 = icmp sge i32 %my.tmp, %load |
| ; CHECK: br label %Flow |
| bb4: |
| %load = load volatile i32, i32 addrspace(1)* undef, align 4 |
| %cmp1 = icmp sge i32 %my.tmp, %load |
| br label %Flow |
| |
| ; CHECK-LABEL: Flow: |
| ; CHECK: %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] |
| ; CHECK: %my.tmp3 = phi i32 addrspace(3)* [ %2, %bb4 ], [ %5, %bb1 ] |
| ; CHECK: %my.tmp4 = phi i1 [ %cmp1, %bb4 ], [ %6, %bb1 ] |
| ; CHECK: br i1 %my.tmp4, label %bb9, label %bb1 |
| Flow: |
| %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] |
| %my.tmp3 = phi i32 addrspace(3)* [@lds.2, %bb4 ], [ @lds.1, %bb1 ] |
| %my.tmp4 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds.1), %bb1 ] |
| br i1 %my.tmp4, label %bb9, label %bb1 |
| |
| ; CHECK-LABEL: bb9: |
| ; CHECK: store volatile i32 7, i32 addrspace(3)* undef, align 4 |
| ; CHECK: ret void |
| bb9: |
| store volatile i32 7, i32 addrspace(3)* undef |
| ret void |
| } |
| |
| ; CHECK-LABEL: @k0 |
| ; CHECK: %1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| ; CHECK: %2 = icmp eq i32 %1, 0 |
| ; CHECK: br i1 %2, label %3, label %4 |
| ; |
| ; CHECK-LABEL: 3: |
| ; CHECK: store i16 ptrtoint (i32 addrspace(3)* @lds.2 to i16), i16 addrspace(3)* @lds.2.ptr, align 2 |
| ; CHECK: store i16 ptrtoint (i32 addrspace(3)* @lds.1 to i16), i16 addrspace(3)* @lds.1.ptr, align 2 |
| ; CHECK: br label %4 |
| ; |
| ; CHECK-LABEL: 4: |
| ; CHECK: call void @llvm.amdgcn.wave.barrier() |
| ; CHECK: call void @f0(i32 %arg) |
| ; CHECK: ret void |
| define amdgpu_kernel void @k0(i32 %arg) { |
| call void @f0(i32 %arg) |
| ret void |
| } |
| |
| declare i32 @llvm.amdgcn.workitem.id.x() |