blob: f7ed057bc4b7ddb466421ccc00e369334b4b420c [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -passes='default<O3>' %s | FileCheck %s
%pair = type { i64, i64 }
%quad = type { i64, i64, i64, i64 }
declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg)
declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg)
; This test verifies that the default O3 pipeline canonicalizes struct allocas
; to vectors only after memcpyopt has run. The input pattern is:
;
; memcpy tmp, obj, 16
; memset obj + 16, 0, 16
;
; ----- SWAP(other, tmp) -----
;
; memcpy swap.tmp, tmp, 16
; memcpy tmp, other, 16
; memcpy other, swap.tmp, 16
;
; It swaps the first 16-bytes of other and tmp, but the first 16-bytes of tmp
; are the same as the first 16-bytes of obj. This comes from real code from
; DuckDB, where the swap function is inlined. If struct-to-vector canonicalization
; runs before memcpyopt, swap.tmp gets promoted to an SSA value and we are stuck
; saving tmp to swap.tmp. Delaying canonicalization until after memcpyopt lets
; memcpyopt notice that tmp and obj share the same first 16-bytes, so swap.tmp
; is no longer needed and the IR collapses to a single load/memmove/store.
define void @move_then_swap(ptr %dst, ptr %src, ptr %other) {
; CHECK-LABEL: define void @move_then_swap(
; CHECK-SAME: ptr nofree writeonly captures(none) initializes((0, 16)) [[DST:%.*]], ptr nofree readonly captures(none) [[SRC:%.*]], ptr nofree captures(none) [[OTHER:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[OTHER]], align 8
; CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) [[OTHER]], ptr noundef nonnull align 8 dereferenceable(16) [[SRC]], i64 16, i1 false)
; CHECK-NEXT: store <2 x i64> [[TMP_SROA_0_0_COPYLOAD]], ptr [[DST]], align 8
; CHECK-NEXT: ret void
;
entry:
%tmp = alloca %pair, align 8
%obj = alloca %quad, align 8
%swap.tmp = alloca %pair, align 8
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %obj, ptr align 8 %src, i64 32, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %obj, i64 16, i1 false)
%obj.tail = getelementptr inbounds i8, ptr %obj, i64 16
call void @llvm.memset.p0.i64(ptr align 8 %obj.tail, i8 0, i64 16, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %swap.tmp, ptr align 8 %tmp, i64 16, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %other, i64 16, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %other, ptr align 8 %swap.tmp, i64 16, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 16, i1 false)
ret void
}