| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -S -passes='default<O3>' %s | FileCheck %s |
| |
| %pair = type { i64, i64 } |
| %quad = type { i64, i64, i64, i64 } |
| |
| declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) |
| declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) |
| |
| ; This test verifies that the default O3 pipeline canonicalizes struct allocas |
| ; to vectors only after memcpyopt has run. The input pattern is: |
| ; |
| ; memcpy tmp, obj, 16 |
| ; memset obj + 16, 0, 16 |
| ; |
| ; ----- SWAP(other, tmp) ----- |
| ; |
| ; memcpy swap.tmp, tmp, 16 |
| ; memcpy tmp, other, 16 |
| ; memcpy other, swap.tmp, 16 |
| ; |
| ; It swaps the first 16-bytes of other and tmp, but the first 16-bytes of tmp |
| ; are the same as the first 16-bytes of obj. This comes from real code from |
| ; DuckDB, where the swap function is inlined. If struct-to-vector canonicalization |
| ; runs before memcpyopt, swap.tmp gets promoted to an SSA value and we are stuck |
| ; saving tmp to swap.tmp. Delaying canonicalization until after memcpyopt lets |
| ; memcpyopt notice that tmp and obj share the same first 16-bytes, so swap.tmp |
| ; is no longer needed and the IR collapses to a single load/memmove/store. |
| define void @move_then_swap(ptr %dst, ptr %src, ptr %other) { |
| ; CHECK-LABEL: define void @move_then_swap( |
| ; CHECK-SAME: ptr nofree writeonly captures(none) initializes((0, 16)) [[DST:%.*]], ptr nofree readonly captures(none) [[SRC:%.*]], ptr nofree captures(none) [[OTHER:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[OTHER]], align 8 |
| ; CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) [[OTHER]], ptr noundef nonnull align 8 dereferenceable(16) [[SRC]], i64 16, i1 false) |
| ; CHECK-NEXT: store <2 x i64> [[TMP_SROA_0_0_COPYLOAD]], ptr [[DST]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %tmp = alloca %pair, align 8 |
| %obj = alloca %quad, align 8 |
| %swap.tmp = alloca %pair, align 8 |
| call void @llvm.memcpy.p0.p0.i64(ptr align 8 %obj, ptr align 8 %src, i64 32, i1 false) |
| call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %obj, i64 16, i1 false) |
| %obj.tail = getelementptr inbounds i8, ptr %obj, i64 16 |
| call void @llvm.memset.p0.i64(ptr align 8 %obj.tail, i8 0, i64 16, i1 false) |
| call void @llvm.memcpy.p0.p0.i64(ptr align 8 %swap.tmp, ptr align 8 %tmp, i64 16, i1 false) |
| call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %other, i64 16, i1 false) |
| call void @llvm.memcpy.p0.p0.i64(ptr align 8 %other, ptr align 8 %swap.tmp, i64 16, i1 false) |
| call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 16, i1 false) |
| ret void |
| } |