Matt Arsenault | a61cb48 | 2016-05-12 01:58:58 +0000 | [diff] [blame] | 1 | ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s |
Yaxun Liu | 73bf0af | 2018-11-06 21:28:17 +0000 | [diff] [blame] | 2 | ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-lds< %s | FileCheck -check-prefix=NOLDS %s |
Matt Arsenault | a61cb48 | 2016-05-12 01:58:58 +0000 | [diff] [blame] | 3 | |
| 4 | ; This normally would be fixed by instcombine to be compare to the GEP |
| 5 | ; indices |
| 6 | |
Yaxun Liu | 73bf0af | 2018-11-06 21:28:17 +0000 | [diff] [blame] | 7 | ; NOLDS-NOT: addrspace(3) |
| 8 | |
Matt Arsenault | a61cb48 | 2016-05-12 01:58:58 +0000 | [diff] [blame] | 9 | ; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer( |
| 10 | ; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 %{{[0-9]+}} |
| 11 | ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a |
| 12 | ; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b |
| 13 | ; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, %ptr1 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 14 | define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { |
Matt Arsenault | a61cb48 | 2016-05-12 01:58:58 +0000 | [diff] [blame] | 15 | %alloca = alloca [16 x i32], align 4 |
| 16 | %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a |
| 17 | %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b |
| 18 | %cmp = icmp eq i32* %ptr0, %ptr1 |
| 19 | %zext = zext i1 %cmp to i32 |
| 20 | store volatile i32 %zext, i32 addrspace(1)* %out |
| 21 | ret void |
| 22 | } |
| 23 | |
Matt Arsenault | 891fccc | 2016-05-18 15:57:21 +0000 | [diff] [blame] | 24 | ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs( |
| 25 | ; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 %{{[0-9]+}} |
| 26 | ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a |
| 27 | ; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, null |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 28 | define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { |
Matt Arsenault | 891fccc | 2016-05-18 15:57:21 +0000 | [diff] [blame] | 29 | %alloca = alloca [16 x i32], align 4 |
| 30 | %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a |
| 31 | %cmp = icmp eq i32* %ptr0, null |
| 32 | %zext = zext i1 %cmp to i32 |
| 33 | store volatile i32 %zext, i32 addrspace(1)* %out |
| 34 | ret void |
| 35 | } |
| 36 | |
| 37 | ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs( |
| 38 | ; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 %{{[0-9]+}} |
| 39 | ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a |
| 40 | ; CHECK: %cmp = icmp eq i32 addrspace(3)* null, %ptr0 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 41 | define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { |
Matt Arsenault | 891fccc | 2016-05-18 15:57:21 +0000 | [diff] [blame] | 42 | %alloca = alloca [16 x i32], align 4 |
| 43 | %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a |
| 44 | %cmp = icmp eq i32* null, %ptr0 |
| 45 | %zext = zext i1 %cmp to i32 |
| 46 | store volatile i32 %zext, i32 addrspace(1)* %out |
| 47 | ret void |
| 48 | } |
| 49 | |
Matt Arsenault | a61cb48 | 2016-05-12 01:58:58 +0000 | [diff] [blame] | 50 | ; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr( |
| 51 | ; CHECK: %alloca = alloca [16 x i32], align 4 |
| 52 | ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a |
| 53 | ; CHECK: %ptr1 = call i32* @get_unknown_pointer() |
| 54 | ; CHECK: %cmp = icmp eq i32* %ptr0, %ptr1 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 55 | define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { |
Matt Arsenault | a61cb48 | 2016-05-12 01:58:58 +0000 | [diff] [blame] | 56 | %alloca = alloca [16 x i32], align 4 |
| 57 | %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a |
| 58 | %ptr1 = call i32* @get_unknown_pointer() |
| 59 | %cmp = icmp eq i32* %ptr0, %ptr1 |
| 60 | %zext = zext i1 %cmp to i32 |
| 61 | store volatile i32 %zext, i32 addrspace(1)* %out |
| 62 | ret void |
| 63 | } |
| 64 | |
| 65 | declare i32* @get_unknown_pointer() #0 |
| 66 | |
Matt Arsenault | 4b47213 | 2019-08-27 12:34:40 -0400 | [diff] [blame] | 67 | attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" } |