llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll - llvm-project.git - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck --check-prefixes=CHECK,WAVE32 %s
 ; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck --check-prefixes=CHECK,WAVE64 %s

 ; As the addition of 32 does not overflow, it can be canonicalized as gep.
 define amdgpu_ps <2 x float> @turn_add_into_gep(ptr addrspace(1) inreg %sbase) {
 ; CHECK-LABEL: define amdgpu_ps <2 x float> @turn_add_into_gep(
 ; CHECK-SAME: ptr addrspace(1) inreg [[SBASE:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[V:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[V]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[MUL]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(1) [[SBASE]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP2]], i64 128
 ; CHECK-NEXT:    [[LOAD:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP]], align 8
 ; CHECK-NEXT:    ret <2 x float> [[LOAD]]
 ;
   %v = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
   %mul = shl i32 %v, 1
   %add = add i32 %mul, 32
   %zext.offset = zext i32 %add to i64
   %gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
   %load = load <2 x float>, ptr addrspace(1) %gep
   ret <2 x float> %load
 }

 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; WAVE32: {{.*}}
 ; WAVE64: {{.*}}
	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
	; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize32 -passes=instcombine -S < %s \| FileCheck --check-prefixes=CHECK,WAVE32 %s
	; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize64 -passes=instcombine -S < %s \| FileCheck --check-prefixes=CHECK,WAVE64 %s

	; As the addition of 32 does not overflow, it can be canonicalized as gep.
	define amdgpu_ps <2 x float> @turn_add_into_gep(ptr addrspace(1) inreg %sbase) {
	; CHECK-LABEL: define amdgpu_ps <2 x float> @turn_add_into_gep(
	; CHECK-SAME: ptr addrspace(1) inreg [[SBASE:%.*]]) #[[ATTR0:[0-9]+]] {
	; CHECK-NEXT: [[V:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
	; CHECK-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[V]], 1
	; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[MUL]] to i64
	; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(1) [[SBASE]], i64 [[TMP1]]
	; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP2]], i64 128
	; CHECK-NEXT: [[LOAD:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP]], align 8
	; CHECK-NEXT: ret <2 x float> [[LOAD]]
	;
	%v = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
	%mul = shl i32 %v, 1
	%add = add i32 %mul, 32
	%zext.offset = zext i32 %add to i64
	%gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
	%load = load <2 x float>, ptr addrspace(1) %gep
	ret <2 x float> %load
	}

	declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
	;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
	; WAVE32: {{.*}}
	; WAVE64: {{.*}}