blob: 4f6e94431e4965dfa4125e1ab98f4990fc32fec6 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -O2 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
;
; Verify that expanding memcmp in the middle-end enables further IR
; optimizations. When two memcmp calls share a common pointer operand and
; constant size, the expanded loads can be CSE'd / GVN'd away.
declare i32 @memcmp(ptr nocapture, ptr nocapture, i64)
; FIXME: The redundant load of %x is not eliminated yet because ExpandMemCmp
; runs late in the pipeline. Moving it earlier should allow GVN to CSE the loads.
; Two memcmp calls with a shared first argument. After expansion in the
; middle-end, further IR optimizations should be able to optimize the expanded code.
define i1 @redundant_memcmp_loads(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture readonly %z) {
; CHECK-LABEL: define i1 @redundant_memcmp_loads(
; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], ptr readonly captures(none) [[Z:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.ucmp.i32.i32(i32 [[TMP3]], i32 [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[X]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[Z]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP6]])
; CHECK-NEXT: [[TMP9:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP7]])
; CHECK-NEXT: [[TMP10:%.*]] = tail call i32 @llvm.ucmp.i32.i32(i32 [[TMP8]], i32 [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], [[TMP5]]
; CHECK-NEXT: [[RESULT:%.*]] = icmp eq i32 [[TMP11]], 0
; CHECK-NEXT: ret i1 [[RESULT]]
;
%cmp1 = call i32 @memcmp(ptr %x, ptr %y, i64 4)
%eq1 = icmp eq i32 %cmp1, 0
%cmp2 = call i32 @memcmp(ptr %x, ptr %z, i64 4)
%eq2 = icmp eq i32 %cmp2, 0
%result = and i1 %eq1, %eq2
ret i1 %result
}