blob: e79e330190722f6653d1d30fb9a45a1059c93b8f [file]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -p=pre-isel-intrinsic-lowering -S < %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -p=pre-isel-intrinsic-lowering -S -amdgpu-memcpy-loop-unroll=2 %s -o - | FileCheck -check-prefixes=CHECK,UNROLL2 %s
define void @memset_pattern_i128_len0_dynvalue(ptr align 16 %a, i128 %value) {
; CHECK-LABEL: @memset_pattern_i128_len0_dynvalue(
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr align 16 %a, i128 %value, i64 0, i1 false)
ret void
}
define void @memset_pattern_i128_len1_dynvalue(ptr align 16 %a, i128 %value) {
; CHECK-LABEL: @memset_pattern_i128_len1_dynvalue(
; CHECK-NEXT: br label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY:%.*]]
; CHECK: memset.pattern-expansion-residual-body:
; CHECK-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP2:%.*]], [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A:%.*]], i64 [[RESIDUAL_LOOP_INDEX]]
; CHECK-NEXT: store i128 [[VALUE:%.*]], ptr [[TMP1]], align 16
; CHECK-NEXT: [[TMP2]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
; CHECK-NEXT: br i1 [[TMP3]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; CHECK: memset.pattern-post-expansion:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr align 16 %a, i128 %value, i64 1, i1 false)
ret void
}
define void @memset_pattern_i128_len1(ptr align 16 %a) {
; CHECK-LABEL: @memset_pattern_i128_len1(
; CHECK-NEXT: br label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY:%.*]]
; CHECK: memset.pattern-expansion-residual-body:
; CHECK-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP2:%.*]], [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A:%.*]], i64 [[RESIDUAL_LOOP_INDEX]]
; CHECK-NEXT: store i128 -113427455635030943652277463699152839203, ptr [[TMP1]], align 16
; CHECK-NEXT: [[TMP2]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
; CHECK-NEXT: br i1 [[TMP3]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; CHECK: memset.pattern-post-expansion:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr align 16 %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
ret void
}
define void @memset_pattern_i128_constlen_mainloop_and_residual_taken(ptr align 16 %a) {
; DEFAULT-LABEL: @memset_pattern_i128_constlen_mainloop_and_residual_taken(
; DEFAULT-NEXT: br label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY:%.*]]
; DEFAULT: memset.pattern-expansion-main-body:
; DEFAULT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP2:%.*]], [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ]
; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <64 x i32>, ptr [[A:%.*]], i64 [[LOOP_INDEX]]
; DEFAULT-NEXT: store <64 x i32> bitcast (<16 x i128> splat (i128 -113427455635030943652277463699152839203) to <64 x i32>), ptr [[TMP1]], align 16
; DEFAULT-NEXT: [[TMP2]] = add i64 [[LOOP_INDEX]], 16
; DEFAULT-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 16
; DEFAULT-NEXT: br i1 [[TMP3]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY:%.*]]
; DEFAULT: memset.pattern-expansion-residual-body:
; DEFAULT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ], [ [[TMP6:%.*]], [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]] ]
; DEFAULT-NEXT: [[TMP4:%.*]] = add i64 16, [[RESIDUAL_LOOP_INDEX]]
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP4]]
; DEFAULT-NEXT: store i128 -113427455635030943652277463699152839203, ptr [[TMP5]], align 16
; DEFAULT-NEXT: [[TMP6]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 3
; DEFAULT-NEXT: br i1 [[TMP7]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; DEFAULT: memset.pattern-post-expansion:
; DEFAULT-NEXT: ret void
;
; UNROLL2-LABEL: @memset_pattern_i128_constlen_mainloop_and_residual_taken(
; UNROLL2-NEXT: br label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY:%.*]]
; UNROLL2: memset.pattern-expansion-main-body:
; UNROLL2-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP2:%.*]], [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ]
; UNROLL2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i32>, ptr [[A:%.*]], i64 [[LOOP_INDEX]]
; UNROLL2-NEXT: store <8 x i32> bitcast (<2 x i128> splat (i128 -113427455635030943652277463699152839203) to <8 x i32>), ptr [[TMP1]], align 16
; UNROLL2-NEXT: [[TMP2]] = add i64 [[LOOP_INDEX]], 2
; UNROLL2-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 18
; UNROLL2-NEXT: br i1 [[TMP3]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; UNROLL2: memset.pattern-expansion-residual-body:
; UNROLL2-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ], [ [[TMP6:%.*]], [[MEMSET_PATTERN_POST_EXPANSION]] ]
; UNROLL2-NEXT: [[TMP4:%.*]] = add i64 18, [[RESIDUAL_LOOP_INDEX]]
; UNROLL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP4]]
; UNROLL2-NEXT: store i128 -113427455635030943652277463699152839203, ptr [[TMP5]], align 16
; UNROLL2-NEXT: [[TMP6]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
; UNROLL2-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 1
; UNROLL2-NEXT: br i1 [[TMP7]], label [[MEMSET_PATTERN_POST_EXPANSION]], label [[MEMSET_PATTERN_POST_EXPANSION1:%.*]]
; UNROLL2: memset.pattern-post-expansion:
; UNROLL2-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr align 16 %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 19, i1 false)
ret void
}
define void @memset_pattern_i128_len1_nz_as(ptr addrspace(3) align 16 %a) {
; CHECK-LABEL: @memset_pattern_i128_len1_nz_as(
; CHECK-NEXT: br label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY:%.*]]
; CHECK: memset.pattern-expansion-residual-body:
; CHECK-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP2:%.*]], [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr addrspace(3) [[A:%.*]], i64 [[RESIDUAL_LOOP_INDEX]]
; CHECK-NEXT: store i128 -113427455635030943652277463699152839203, ptr addrspace(3) [[TMP1]], align 16
; CHECK-NEXT: [[TMP2]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
; CHECK-NEXT: br i1 [[TMP3]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; CHECK: memset.pattern-post-expansion:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr addrspace(3) align 16 %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
ret void
}
define void @memset_pattern_i128_len1_no_align(ptr %a) {
; CHECK-LABEL: @memset_pattern_i128_len1_no_align(
; CHECK-NEXT: br label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY:%.*]]
; CHECK: memset.pattern-expansion-residual-body:
; CHECK-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP2:%.*]], [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A:%.*]], i64 [[RESIDUAL_LOOP_INDEX]]
; CHECK-NEXT: store i128 -113427455635030943652277463699152839203, ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP2]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
; CHECK-NEXT: br i1 [[TMP3]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; CHECK: memset.pattern-post-expansion:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
ret void
}
define void @memset_pattern_i128_len16(ptr align 16 %a) {
; DEFAULT-LABEL: @memset_pattern_i128_len16(
; DEFAULT-NEXT: br label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY:%.*]]
; DEFAULT: memset.pattern-expansion-main-body:
; DEFAULT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP2:%.*]], [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ]
; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <64 x i32>, ptr [[A:%.*]], i64 [[LOOP_INDEX]]
; DEFAULT-NEXT: store <64 x i32> bitcast (<16 x i128> splat (i128 -113427455635030943652277463699152839203) to <64 x i32>), ptr [[TMP1]], align 16
; DEFAULT-NEXT: [[TMP2]] = add i64 [[LOOP_INDEX]], 16
; DEFAULT-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 16
; DEFAULT-NEXT: br i1 [[TMP3]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; DEFAULT: memset.pattern-post-expansion:
; DEFAULT-NEXT: ret void
;
; UNROLL2-LABEL: @memset_pattern_i128_len16(
; UNROLL2-NEXT: br label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY:%.*]]
; UNROLL2: memset.pattern-expansion-main-body:
; UNROLL2-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP2:%.*]], [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ]
; UNROLL2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i32>, ptr [[A:%.*]], i64 [[LOOP_INDEX]]
; UNROLL2-NEXT: store <8 x i32> bitcast (<2 x i128> splat (i128 -113427455635030943652277463699152839203) to <8 x i32>), ptr [[TMP1]], align 16
; UNROLL2-NEXT: [[TMP2]] = add i64 [[LOOP_INDEX]], 2
; UNROLL2-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 16
; UNROLL2-NEXT: br i1 [[TMP3]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; UNROLL2: memset.pattern-post-expansion:
; UNROLL2-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr align 16 %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 false)
ret void
}
define void @memset_pattern_i128_dynlen(ptr align 16 %a, i64 %len) {
; CHECK-LABEL: @memset_pattern_i128_dynlen(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[TMP1]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY:%.*]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; CHECK: memset.pattern-expansion-main-body:
; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr [[A:%.*]], i64 [[LOOP_INDEX]]
; CHECK-NEXT: store i128 -113427455635030943652277463699152839203, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP3]] = add i64 [[LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], [[LEN]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION]]
; CHECK: memset.pattern-post-expansion:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %len, i1 false)
ret void
}
define void @memset_pattern_i128_dynlen_nz_as(ptr addrspace(3) align 16 %a, i64 %len) {
; CHECK-LABEL: @memset_pattern_i128_dynlen_nz_as(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[TMP1]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY:%.*]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; CHECK: memset.pattern-expansion-main-body:
; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr addrspace(3) [[A:%.*]], i64 [[LOOP_INDEX]]
; CHECK-NEXT: store i128 -113427455635030943652277463699152839203, ptr addrspace(3) [[TMP2]], align 16
; CHECK-NEXT: [[TMP3]] = add i64 [[LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], [[LEN]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION]]
; CHECK: memset.pattern-post-expansion:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr addrspace(3) align 16 %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %len, i1 false)
ret void
}
define void @memset_pattern_i32_dynlen(ptr align 16 %a, i64 %len) {
; CHECK-LABEL: @memset_pattern_i32_dynlen(
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[LEN:%.*]], 3
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[LEN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY:%.*]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_COND:%.*]]
; CHECK: memset.pattern-expansion-main-body:
; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[A:%.*]], i64 [[LOOP_INDEX]]
; CHECK-NEXT: store <4 x i32> splat (i32 -1430532899), ptr [[TMP4]], align 16
; CHECK-NEXT: [[TMP5]] = add i64 [[LOOP_INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[TMP2]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_COND]]
; CHECK: memset.pattern-expansion-residual-cond:
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY:%.*]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; CHECK: memset.pattern-expansion-residual-body:
; CHECK-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[MEMSET_PATTERN_EXPANSION_RESIDUAL_COND]] ], [ [[TMP10:%.*]], [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], [[RESIDUAL_LOOP_INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
; CHECK-NEXT: store i32 -1430532899, ptr [[TMP9]], align 4
; CHECK-NEXT: [[TMP10]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP1]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION]]
; CHECK: memset.pattern-post-expansion:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr align 16 %a, i32 u0xaabbccdd, i64 %len, i1 false)
ret void
}
define void @memset_pattern_i32_dynval_dynlen(ptr align 16 %a, i32 %val, i64 %len) {
; CHECK-LABEL: @memset_pattern_i32_dynval_dynlen(
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[LEN:%.*]], 3
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[LEN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT: [[SETVALUE_SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[VAL:%.*]], i64 0
; CHECK-NEXT: [[SETVALUE_SPLAT_SPLAT:%.*]] = shufflevector <4 x i32> [[SETVALUE_SPLAT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br i1 [[TMP3]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY:%.*]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_COND:%.*]]
; CHECK: memset.pattern-expansion-main-body:
; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[A:%.*]], i64 [[LOOP_INDEX]]
; CHECK-NEXT: store <4 x i32> [[SETVALUE_SPLAT_SPLAT]], ptr [[TMP4]], align 16
; CHECK-NEXT: [[TMP5]] = add i64 [[LOOP_INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[TMP2]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_COND]]
; CHECK: memset.pattern-expansion-residual-cond:
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY:%.*]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; CHECK: memset.pattern-expansion-residual-body:
; CHECK-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[MEMSET_PATTERN_EXPANSION_RESIDUAL_COND]] ], [ [[TMP10:%.*]], [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], [[RESIDUAL_LOOP_INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
; CHECK-NEXT: store i32 [[VAL]], ptr [[TMP9]], align 4
; CHECK-NEXT: [[TMP10]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP1]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MEMSET_PATTERN_EXPANSION_RESIDUAL_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION]]
; CHECK: memset.pattern-post-expansion:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr align 16 %a, i32 %val, i64 %len, i1 false)
ret void
}
; For i96, the store size and the alloc size differ on amdgpu, this case is not
; optimized.
define void @memset_pattern_i96_dynval_dynlen(ptr align 16 %a, i96 %val, i64 %len) {
; CHECK-LABEL: @memset_pattern_i96_dynval_dynlen(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[TMP1]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY:%.*]], label [[MEMSET_PATTERN_POST_EXPANSION:%.*]]
; CHECK: memset.pattern-expansion-main-body:
; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i96, ptr [[A:%.*]], i64 [[LOOP_INDEX]]
; CHECK-NEXT: store i96 [[VAL:%.*]], ptr [[TMP2]], align 16
; CHECK-NEXT: [[TMP3]] = add i64 [[LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], [[LEN]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MEMSET_PATTERN_EXPANSION_MAIN_BODY]], label [[MEMSET_PATTERN_POST_EXPANSION]]
; CHECK: memset.pattern-post-expansion:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr align 16 %a, i96 %val, i64 %len, i1 false)
ret void
}