test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll - llvm-project/llvm - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-lds< %s | FileCheck -check-prefix=NOLDS %s

 ; This normally would be fixed by instcombine to be compare to the GEP
 ; indices

 define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
 ; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(4)* [[TMP1]] to i32 addrspace(4)*
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[TMP2]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32 addrspace(4)* [[TMP3]], align 4, !invariant.load !0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[TMP2]], i64 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[TMP5]], align 4, !range [[RNG1:![0-9]+]], !invariant.load !0
 ; CHECK-NEXT:    [[TMP7:%.*]] = lshr i32 [[TMP4]], 16
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2:![0-9]+]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 [[TMP15]]
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[TMP16]], i32 0, i32 [[A:%.*]]
 ; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[TMP16]], i32 0, i32 [[B:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 addrspace(3)* [[PTR0]], [[PTR1]]
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
 ; CHECK-NEXT:    store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; NOLDS-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
 ; NOLDS-NEXT:    [[ALLOCA:%.*]] = alloca [16 x i32], align 4
 ; NOLDS-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]]
 ; NOLDS-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[B:%.*]]
 ; NOLDS-NEXT:    [[CMP:%.*]] = icmp eq i32* [[PTR0]], [[PTR1]]
 ; NOLDS-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
 ; NOLDS-NEXT:    store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; NOLDS-NEXT:    ret void
 ;
   %alloca = alloca [16 x i32], align 4
   %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
   %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
   %cmp = icmp eq i32* %ptr0, %ptr1
   %zext = zext i1 %cmp to i32
   store volatile i32 %zext, i32 addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
 ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(4)* [[TMP1]] to i32 addrspace(4)*
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[TMP2]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32 addrspace(4)* [[TMP3]], align 4, !invariant.load !0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[TMP2]], i64 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[TMP5]], align 4, !range [[RNG1]], !invariant.load !0
 ; CHECK-NEXT:    [[TMP7:%.*]] = lshr i32 [[TMP4]], 16
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 [[TMP15]]
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[TMP16]], i32 0, i32 [[A:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 addrspace(3)* [[PTR0]], null
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
 ; CHECK-NEXT:    store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_rhs(
 ; NOLDS-NEXT:    [[ALLOCA:%.*]] = alloca [16 x i32], align 4
 ; NOLDS-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]]
 ; NOLDS-NEXT:    [[CMP:%.*]] = icmp eq i32* [[PTR0]], null
 ; NOLDS-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
 ; NOLDS-NEXT:    store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; NOLDS-NEXT:    ret void
 ;
   %alloca = alloca [16 x i32], align 4
   %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
   %cmp = icmp eq i32* %ptr0, null
   %zext = zext i1 %cmp to i32
   store volatile i32 %zext, i32 addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
 ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(4)* [[TMP1]] to i32 addrspace(4)*
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[TMP2]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32 addrspace(4)* [[TMP3]], align 4, !invariant.load !0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[TMP2]], i64 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[TMP5]], align 4, !range [[RNG1]], !invariant.load !0
 ; CHECK-NEXT:    [[TMP7:%.*]] = lshr i32 [[TMP4]], 16
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 [[TMP15]]
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[TMP16]], i32 0, i32 [[A:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 addrspace(3)* null, [[PTR0]]
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
 ; CHECK-NEXT:    store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_lhs(
 ; NOLDS-NEXT:    [[ALLOCA:%.*]] = alloca [16 x i32], align 4
 ; NOLDS-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]]
 ; NOLDS-NEXT:    [[CMP:%.*]] = icmp eq i32* null, [[PTR0]]
 ; NOLDS-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
 ; NOLDS-NEXT:    store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; NOLDS-NEXT:    ret void
 ;
   %alloca = alloca [16 x i32], align 4
   %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
   %cmp = icmp eq i32* null, %ptr0
   %zext = zext i1 %cmp to i32
   store volatile i32 %zext, i32 addrspace(1)* %out
   ret void
 }

 define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
 ; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [16 x i32], align 4
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]]
 ; CHECK-NEXT:    [[PTR1:%.*]] = call i32* @get_unknown_pointer()
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[PTR0]], [[PTR1]]
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
 ; CHECK-NEXT:    store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; NOLDS-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
 ; NOLDS-NEXT:    [[ALLOCA:%.*]] = alloca [16 x i32], align 4
 ; NOLDS-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]]
 ; NOLDS-NEXT:    [[PTR1:%.*]] = call i32* @get_unknown_pointer()
 ; NOLDS-NEXT:    [[CMP:%.*]] = icmp eq i32* [[PTR0]], [[PTR1]]
 ; NOLDS-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
 ; NOLDS-NEXT:    store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; NOLDS-NEXT:    ret void
 ;
   %alloca = alloca [16 x i32], align 4
   %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
   %ptr1 = call i32* @get_unknown_pointer()
   %cmp = icmp eq i32* %ptr0, %ptr1
   %zext = zext i1 %cmp to i32
   store volatile i32 %zext, i32 addrspace(1)* %out
   ret void
 }

 declare i32* @get_unknown_pointer() #0

 attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s \| FileCheck %s
	; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-lds< %s \| FileCheck -check-prefix=NOLDS %s

	; This normally would be fixed by instcombine to be compare to the GEP
	; indices

	define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
	; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
	; CHECK-NEXT: [[TMP1:%.]] = call noalias nonnull dereferenceable(64) i8 addrspace(4) @llvm.amdgcn.dispatch.ptr()
	; CHECK-NEXT: [[TMP2:%.]] = bitcast i8 addrspace(4) [[TMP1]] to i32 addrspace(4)*
	; CHECK-NEXT: [[TMP3:%.]] = getelementptr inbounds i32, i32 addrspace(4) [[TMP2]], i64 1
	; CHECK-NEXT: [[TMP4:%.]] = load i32, i32 addrspace(4) [[TMP3]], align 4, !invariant.load !0
	; CHECK-NEXT: [[TMP5:%.]] = getelementptr inbounds i32, i32 addrspace(4) [[TMP2]], i64 2
	; CHECK-NEXT: [[TMP6:%.]] = load i32, i32 addrspace(4) [[TMP5]], align 4, !range [[RNG1:![0-9]+]], !invariant.load !0
	; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP4]], 16
	; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2:![0-9]+]]
	; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]]
	; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]]
	; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]]
	; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]]
	; CHECK-NEXT: [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]]
	; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
	; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]]
	; CHECK-NEXT: [[TMP16:%.]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3) @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 [[TMP15]]
	; CHECK-NEXT: [[PTR0:%.]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3) [[TMP16]], i32 0, i32 [[A:%.*]]
	; CHECK-NEXT: [[PTR1:%.]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3) [[TMP16]], i32 0, i32 [[B:%.*]]
	; CHECK-NEXT: [[CMP:%.]] = icmp eq i32 addrspace(3) [[PTR0]], [[PTR1]]
	; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
	; CHECK-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
	; CHECK-NEXT: ret void
	;
	; NOLDS-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
	; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4
	; NOLDS-NEXT: [[PTR0:%.]] = getelementptr inbounds [16 x i32], [16 x i32] [[ALLOCA]], i32 0, i32 [[A:%.*]]
	; NOLDS-NEXT: [[PTR1:%.]] = getelementptr inbounds [16 x i32], [16 x i32] [[ALLOCA]], i32 0, i32 [[B:%.*]]
	; NOLDS-NEXT: [[CMP:%.]] = icmp eq i32 [[PTR0]], [[PTR1]]
	; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
	; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
	; NOLDS-NEXT: ret void
	;
	%alloca = alloca [16 x i32], align 4
	%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
	%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
	%cmp = icmp eq i32* %ptr0, %ptr1
	%zext = zext i1 %cmp to i32
	store volatile i32 %zext, i32 addrspace(1)* %out
	ret void
	}

	define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
	; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs(
	; CHECK-NEXT: [[TMP1:%.]] = call noalias nonnull dereferenceable(64) i8 addrspace(4) @llvm.amdgcn.dispatch.ptr()
	; CHECK-NEXT: [[TMP2:%.]] = bitcast i8 addrspace(4) [[TMP1]] to i32 addrspace(4)*
	; CHECK-NEXT: [[TMP3:%.]] = getelementptr inbounds i32, i32 addrspace(4) [[TMP2]], i64 1
	; CHECK-NEXT: [[TMP4:%.]] = load i32, i32 addrspace(4) [[TMP3]], align 4, !invariant.load !0
	; CHECK-NEXT: [[TMP5:%.]] = getelementptr inbounds i32, i32 addrspace(4) [[TMP2]], i64 2
	; CHECK-NEXT: [[TMP6:%.]] = load i32, i32 addrspace(4) [[TMP5]], align 4, !range [[RNG1]], !invariant.load !0
	; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP4]], 16
	; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2]]
	; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]]
	; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]]
	; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]]
	; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]]
	; CHECK-NEXT: [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]]
	; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
	; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]]
	; CHECK-NEXT: [[TMP16:%.]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3) @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 [[TMP15]]
	; CHECK-NEXT: [[PTR0:%.]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3) [[TMP16]], i32 0, i32 [[A:%.*]]
	; CHECK-NEXT: [[CMP:%.]] = icmp eq i32 addrspace(3) [[PTR0]], null
	; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
	; CHECK-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
	; CHECK-NEXT: ret void
	;
	; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_rhs(
	; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4
	; NOLDS-NEXT: [[PTR0:%.]] = getelementptr inbounds [16 x i32], [16 x i32] [[ALLOCA]], i32 0, i32 [[A:%.*]]
	; NOLDS-NEXT: [[CMP:%.]] = icmp eq i32 [[PTR0]], null
	; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
	; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
	; NOLDS-NEXT: ret void
	;
	%alloca = alloca [16 x i32], align 4
	%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
	%cmp = icmp eq i32* %ptr0, null
	%zext = zext i1 %cmp to i32
	store volatile i32 %zext, i32 addrspace(1)* %out
	ret void
	}

	define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
	; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs(
	; CHECK-NEXT: [[TMP1:%.]] = call noalias nonnull dereferenceable(64) i8 addrspace(4) @llvm.amdgcn.dispatch.ptr()
	; CHECK-NEXT: [[TMP2:%.]] = bitcast i8 addrspace(4) [[TMP1]] to i32 addrspace(4)*
	; CHECK-NEXT: [[TMP3:%.]] = getelementptr inbounds i32, i32 addrspace(4) [[TMP2]], i64 1
	; CHECK-NEXT: [[TMP4:%.]] = load i32, i32 addrspace(4) [[TMP3]], align 4, !invariant.load !0
	; CHECK-NEXT: [[TMP5:%.]] = getelementptr inbounds i32, i32 addrspace(4) [[TMP2]], i64 2
	; CHECK-NEXT: [[TMP6:%.]] = load i32, i32 addrspace(4) [[TMP5]], align 4, !range [[RNG1]], !invariant.load !0
	; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP4]], 16
	; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2]]
	; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]]
	; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]]
	; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]]
	; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]]
	; CHECK-NEXT: [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]]
	; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
	; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]]
	; CHECK-NEXT: [[TMP16:%.]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3) @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 [[TMP15]]
	; CHECK-NEXT: [[PTR0:%.]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3) [[TMP16]], i32 0, i32 [[A:%.*]]
	; CHECK-NEXT: [[CMP:%.]] = icmp eq i32 addrspace(3) null, [[PTR0]]
	; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
	; CHECK-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
	; CHECK-NEXT: ret void
	;
	; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_lhs(
	; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4
	; NOLDS-NEXT: [[PTR0:%.]] = getelementptr inbounds [16 x i32], [16 x i32] [[ALLOCA]], i32 0, i32 [[A:%.*]]
	; NOLDS-NEXT: [[CMP:%.]] = icmp eq i32 null, [[PTR0]]
	; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
	; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
	; NOLDS-NEXT: ret void
	;
	%alloca = alloca [16 x i32], align 4
	%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
	%cmp = icmp eq i32* null, %ptr0
	%zext = zext i1 %cmp to i32
	store volatile i32 %zext, i32 addrspace(1)* %out
	ret void
	}

	define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
	; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
	; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4
	; CHECK-NEXT: [[PTR0:%.]] = getelementptr inbounds [16 x i32], [16 x i32] [[ALLOCA]], i32 0, i32 [[A:%.*]]
	; CHECK-NEXT: [[PTR1:%.]] = call i32 @get_unknown_pointer()
	; CHECK-NEXT: [[CMP:%.]] = icmp eq i32 [[PTR0]], [[PTR1]]
	; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
	; CHECK-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
	; CHECK-NEXT: ret void
	;
	; NOLDS-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
	; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4
	; NOLDS-NEXT: [[PTR0:%.]] = getelementptr inbounds [16 x i32], [16 x i32] [[ALLOCA]], i32 0, i32 [[A:%.*]]
	; NOLDS-NEXT: [[PTR1:%.]] = call i32 @get_unknown_pointer()
	; NOLDS-NEXT: [[CMP:%.]] = icmp eq i32 [[PTR0]], [[PTR1]]
	; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
	; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
	; NOLDS-NEXT: ret void
	;
	%alloca = alloca [16 x i32], align 4
	%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
	%ptr1 = call i32* @get_unknown_pointer()
	%cmp = icmp eq i32* %ptr0, %ptr1
	%zext = zext i1 %cmp to i32
	store volatile i32 %zext, i32 addrspace(1)* %out
	ret void
	}

	declare i32* @get_unknown_pointer() #0

	attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }