| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -S -passes=early-cse < %s | FileCheck %s |
| |
| ; Unequal mask check. |
| |
| ; Load-load: the second load can be removed if (assuming unequal masks) the |
| ; second loaded value is a subset of the first loaded value considering the |
| ; non-undef vector elements. In other words, if the second mask is a submask |
| ; of the first one, and the through value of the second load is undef. |
| |
| ; Load-load, second mask is a submask of the first, second through is undef. |
| ; Expect the second load to be removed. |
| define <4 x i32> @f3(ptr %a0, <4 x i32> %a1) { |
| ; CHECK-LABEL: @f3( |
| ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) |
| ; CHECK-NEXT: [[V2:%.*]] = add <4 x i32> [[V0]], [[V0]] |
| ; CHECK-NEXT: ret <4 x i32> [[V2]] |
| ; |
| %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) |
| %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> undef) |
| %v2 = add <4 x i32> %v0, %v1 |
| ret <4 x i32> %v2 |
| } |
| |
| ; Load-load, second mask is a submask of the first, second through is not undef. |
| ; Expect the second load to remain. |
| define <4 x i32> @f4(ptr %a0, <4 x i32> %a1) { |
| ; CHECK-LABEL: @f4( |
| ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) |
| ; CHECK-NEXT: [[V1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) |
| ; CHECK-NEXT: [[V2:%.*]] = add <4 x i32> [[V0]], [[V1]] |
| ; CHECK-NEXT: ret <4 x i32> [[V2]] |
| ; |
| %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) |
| %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) |
| %v2 = add <4 x i32> %v0, %v1 |
| ret <4 x i32> %v2 |
| } |
| |
| ; Load-load, second mask is not a submask of the first, second through is undef. |
| ; Expect the second load to remain. |
| define <4 x i32> @f5(ptr %a0, <4 x i32> %a1) { |
| ; CHECK-LABEL: @f5( |
| ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) |
| ; CHECK-NEXT: [[V1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) |
| ; CHECK-NEXT: [[V2:%.*]] = add <4 x i32> [[V0]], [[V1]] |
| ; CHECK-NEXT: ret <4 x i32> [[V2]] |
| ; |
| %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) |
| %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) |
| %v2 = add <4 x i32> %v0, %v1 |
| ret <4 x i32> %v2 |
| } |
| |
| ; Store-store: the first store can be removed if the first; mask is a submask |
| ; of the second mask. |
| |
| ; Store-store, first mask is a submask of the second. |
| ; Expect the first store to be removed. |
| define void @f6(<4 x i32> %a0, ptr %a1) { |
| ; CHECK-LABEL: @f6( |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) |
| call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| ret void |
| } |
| |
| ; Store-store, first mask is not a submask of the second. |
| ; Expect both stores to remain. |
| define void @f7(<4 x i32> %a0, ptr %a1) { |
| ; CHECK-LABEL: @f7( |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0]], ptr [[A1]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) |
| ret void |
| } |
| |
| ; Load-store: the store can be removed if the store's mask is a submask of the |
| ; load's mask. |
| |
| ; Load-store, second mask is a submask of the first. |
| ; Expect the store to be removed. |
| define <4 x i32> @f8(ptr %a0, <4 x i32> %a1) { |
| ; CHECK-LABEL: @f8( |
| ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) |
| ; CHECK-NEXT: ret <4 x i32> [[V0]] |
| ; |
| %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) |
| call void @llvm.masked.store.v4i32.p0(<4 x i32> %v0, ptr %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) |
| ret <4 x i32> %v0 |
| } |
| |
| ; Load-store, second mask is not a submask of the first. |
| ; Expect the store to remain. |
| define <4 x i32> @f9(ptr %a0, <4 x i32> %a1) { |
| ; CHECK-LABEL: @f9( |
| ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V0]], ptr [[A0]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| ; CHECK-NEXT: ret <4 x i32> [[V0]] |
| ; |
| %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a1) |
| call void @llvm.masked.store.v4i32.p0(<4 x i32> %v0, ptr %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| ret <4 x i32> %v0 |
| } |
| |
| ; Store-load: the load can be removed if load's mask is a submask of the |
| ; store's mask, and the load's through value is undef. |
| |
| ; Store-load, load's mask is a submask of store's mask, thru is undef. |
| ; Expect the load to be removed. |
| define <4 x i32> @fa(<4 x i32> %a0, ptr %a1) { |
| ; CHECK-LABEL: @fa( |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| ; CHECK-NEXT: ret <4 x i32> [[A0]] |
| ; |
| call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> undef) |
| ret <4 x i32> %v0 |
| } |
| |
| ; Store-load, load's mask is a submask of store's mask, thru is not undef. |
| ; Expect the load to remain. |
| define <4 x i32> @fb(<4 x i32> %a0, ptr %a1) { |
| ; CHECK-LABEL: @fb( |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A1]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) |
| ; CHECK-NEXT: ret <4 x i32> [[V0]] |
| ; |
| call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) |
| ret <4 x i32> %v0 |
| } |
| |
| ; Store-load, load's mask is not a submask of store's mask, thru is undef. |
| ; Expect the load to remain. |
| define <4 x i32> @fc(<4 x i32> %a0, ptr %a1) { |
| ; CHECK-LABEL: @fc( |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) |
| ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A1]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> undef) |
| ; CHECK-NEXT: ret <4 x i32> [[V0]] |
| ; |
| call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) |
| %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> undef) |
| ret <4 x i32> %v0 |
| } |
| |
| declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) |
| declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) |