blob: 1350f68ede271a305788282a0823683c9ba4115e [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck --check-prefixes=CHECK,WAVE32 %s
; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck --check-prefixes=CHECK,WAVE64 %s
; As the addition of 32 does not overflow, it can be canonicalized as gep.
define amdgpu_ps <2 x float> @turn_add_into_gep(ptr addrspace(1) inreg %sbase) {
; CHECK-LABEL: define amdgpu_ps <2 x float> @turn_add_into_gep(
; CHECK-SAME: ptr addrspace(1) inreg [[SBASE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[V:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[V]], 1
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[MUL]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(1) [[SBASE]], i64 [[TMP1]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP2]], i64 128
; CHECK-NEXT: [[LOAD:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP]], align 8
; CHECK-NEXT: ret <2 x float> [[LOAD]]
;
%v = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%mul = shl i32 %v, 1
%add = add i32 %mul, 32
%zext.offset = zext i32 %add to i64
%gep = getelementptr inbounds float, ptr addrspace(1) %sbase, i64 %zext.offset
%load = load <2 x float>, ptr addrspace(1) %gep
ret <2 x float> %load
}
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; WAVE32: {{.*}}
; WAVE64: {{.*}}