blob: f31421de517cb9d1ce9716d2bc6866673bfcd53d [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca,sroa < %s | FileCheck %s
; Checks that memsets don't block PromoteAlloca.
; Note: memsets are just updated with the new type size. They are not eliminated which means
; the original alloca also stay. This puts a bit more load on SROA.
; If PromoteAlloca is moved to SSAUpdater, we could just entirely replace the memsets with
; e.g. ConstantAggregate.
define amdgpu_kernel void @memset_all_zero(i64 %val) {
; CHECK-LABEL: @memset_all_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <6 x i64> zeroinitializer, i64 [[VAL:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <6 x i64> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <6 x i64> [[TMP0]], i64 [[VAL]], i64 1
; CHECK-NEXT: ret void
;
entry:
%stack = alloca [6 x i64], align 4, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 48, i1 false)
store i64 %val, ptr addrspace(5) %stack
%reload = load i64, ptr addrspace(5) %stack
%stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1
store i64 %val, ptr addrspace(5) %stack.1
ret void
}
define amdgpu_kernel void @memset_all_5(i64 %val) {
; CHECK-LABEL: @memset_all_5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> <i64 361700864190383365, i64 361700864190383365, i64 361700864190383365, i64 361700864190383365>, i64 [[VAL:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i64 1
; CHECK-NEXT: ret void
;
entry:
%stack = alloca [4 x i64], align 4, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 5, i64 32, i1 false)
store i64 %val, ptr addrspace(5) %stack
%reload = load i64, ptr addrspace(5) %stack
%stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1
store i64 %val, ptr addrspace(5) %stack.1
ret void
}
define amdgpu_kernel void @memset_volatile_nopromote(i64 %val) {
; CHECK-LABEL: @memset_volatile_nopromote(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STACK_SROA_0:%.*]] = alloca i64, align 8, addrspace(5)
; CHECK-NEXT: [[STACK_SROA_2:%.*]] = alloca [3 x i64], align 8, addrspace(5)
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 [[STACK_SROA_0]], i8 0, i64 8, i1 true)
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 [[STACK_SROA_2]], i8 0, i64 24, i1 true)
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK_SROA_0]], align 8
; CHECK-NEXT: ret void
;
entry:
%stack = alloca [4 x i64], align 4, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 32, i1 true)
store i64 %val, ptr addrspace(5) %stack
ret void
}
define amdgpu_kernel void @memset_badsize_nopromote(i64 %val) {
; CHECK-LABEL: @memset_badsize_nopromote(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STACK_SROA_0:%.*]] = alloca i64, align 8, addrspace(5)
; CHECK-NEXT: [[STACK_SROA_2:%.*]] = alloca [23 x i8], align 4, addrspace(5)
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 [[STACK_SROA_0]], i8 0, i64 8, i1 true)
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[STACK_SROA_2]], i8 0, i64 23, i1 true)
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK_SROA_0]], align 8
; CHECK-NEXT: ret void
;
entry:
%stack = alloca [4 x i64], align 4, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 31, i1 true)
store i64 %val, ptr addrspace(5) %stack
ret void
}
define amdgpu_kernel void @memset_offset_ptr_nopromote(i64 %val) {
; CHECK-LABEL: @memset_offset_ptr_nopromote(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STACK_SROA_1:%.*]] = alloca [3 x i64], align 8, addrspace(5)
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 [[STACK_SROA_1]], i8 0, i64 24, i1 true)
; CHECK-NEXT: ret void
;
entry:
%stack = alloca [4 x i64], align 4, addrspace(5)
%gep = getelementptr [4 x i64], ptr addrspace(5) %stack, i64 0, i64 1
call void @llvm.memset.p5.i64(ptr addrspace(5) %gep, i8 0, i64 24, i1 true)
store i64 %val, ptr addrspace(5) %stack
ret void
}
declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg)