| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -S -O2 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s |
| ; |
| ; Verify that expanding memcmp in the middle-end enables further IR |
| ; optimizations. When two memcmp calls share a common pointer operand and |
| ; constant size, the expanded loads can be CSE'd / GVN'd away. |
| |
| declare i32 @memcmp(ptr nocapture, ptr nocapture, i64) |
| |
| ; FIXME: The redundant load of %x is not eliminated yet because ExpandMemCmp |
| ; runs late in the pipeline. Moving it earlier should allow GVN to CSE the loads. |
| ; Two memcmp calls with a shared first argument. After expansion in the |
| ; middle-end, further IR optimizations should be able to optimize the expanded code. |
| define i1 @redundant_memcmp_loads(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture readonly %z) { |
| ; CHECK-LABEL: define i1 @redundant_memcmp_loads( |
| ; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], ptr readonly captures(none) [[Z:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.ucmp.i32.i32(i32 [[TMP3]], i32 [[TMP4]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[X]], align 1 |
| ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[Z]], align 1 |
| ; CHECK-NEXT: [[TMP8:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP6]]) |
| ; CHECK-NEXT: [[TMP9:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP7]]) |
| ; CHECK-NEXT: [[TMP10:%.*]] = tail call i32 @llvm.ucmp.i32.i32(i32 [[TMP8]], i32 [[TMP9]]) |
| ; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], [[TMP5]] |
| ; CHECK-NEXT: [[RESULT:%.*]] = icmp eq i32 [[TMP11]], 0 |
| ; CHECK-NEXT: ret i1 [[RESULT]] |
| ; |
| %cmp1 = call i32 @memcmp(ptr %x, ptr %y, i64 4) |
| %eq1 = icmp eq i32 %cmp1, 0 |
| %cmp2 = call i32 @memcmp(ptr %x, ptr %z, i64 4) |
| %eq2 = icmp eq i32 %cmp2, 0 |
| %result = and i1 %eq1, %eq2 |
| ret i1 %result |
| } |