| ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -passes='print<access-info>' -disable-output < %s 2>&1 | FileCheck %s |
| |
| target datalayout = "e-m:o-i64:64-p1:128:128-f80:128-n8:16:32:64-S128" |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 16] = A[i % 16] + 1; |
| define void @load_store_same_clamped_i32(ptr %a) { |
| ; CHECK-LABEL: 'load_store_same_clamped_i32' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %lv = load i32, ptr %gep, align 4 -> |
| ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep = getelementptr inbounds i32, ptr %a, i64 %idx: |
| ; CHECK-NEXT: ((4 * (zext i4 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 16 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %lv = load i32, ptr %gep, align 4 |
| %add = add i32 %lv, 1 |
| store i32 %add, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 8] = B[i]; |
| define void @store_clamped_load_linear(ptr %a, ptr %b) { |
| ; CHECK-LABEL: 'store_clamped_load_linear' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe with run-time checks |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Check 0: |
| ; CHECK-NEXT: Comparing group GRP0: |
| ; CHECK-NEXT: %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv |
| ; CHECK-NEXT: Against group GRP1: |
| ; CHECK-NEXT: %gep.a = getelementptr inbounds i32, ptr %a, i64 %idx.mod |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-NEXT: Group GRP0: |
| ; CHECK-NEXT: (Low: %b High: (4096 + %b)) |
| ; CHECK-NEXT: Member: {%b,+,4}<nuw><%loop> |
| ; CHECK-NEXT: Group GRP1: |
| ; CHECK-NEXT: (Low: %a High: (4096 + %a)) |
| ; CHECK-NEXT: Member: {%a,+,4}<nw><%loop> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep.a = getelementptr inbounds i32, ptr %a, i64 %idx.mod: |
| ; CHECK-NEXT: ((4 * (zext i3 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx.mod = urem i64 %iv, 8 |
| %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv |
| %lv = load i32, ptr %gep.b, align 4 |
| %gep.a = getelementptr inbounds i32, ptr %a, i64 %idx.mod |
| store i32 %lv, ptr %gep.a, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 4] = i; |
| define void @only_store_clamped(ptr %a) { |
| ; CHECK-LABEL: 'only_store_clamped' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 4 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %val = trunc i64 %iv to i32 |
| store i32 %val, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 8] = A[i % 8] + 1; |
| define void @clamped_power_of_2(ptr %a) { |
| ; CHECK-LABEL: 'clamped_power_of_2' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %lv = load i32, ptr %gep, align 4 -> |
| ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep = getelementptr inbounds i32, ptr %a, i64 %idx: |
| ; CHECK-NEXT: ((4 * (zext i3 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 8 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %lv = load i32, ptr %gep, align 4 |
| %add = add i32 %lv, 1 |
| store i32 %add, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 7] = A[i % 7] + 1; |
| define void @clamped_non_power_of_2(ptr %a) { |
| ; CHECK-LABEL: 'clamped_non_power_of_2' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop |
| ; CHECK-NEXT: Unsafe indirect dependence. |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: IndirectUnsafe: |
| ; CHECK-NEXT: %lv = load i32, ptr %gep, align 4 -> |
| ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 7 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %lv = load i32, ptr %gep, align 4 |
| %add = add i32 %lv, 1 |
| store i32 %add, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) { |
| ; A[i % 8] = x; |
| ; A[i % 8] = y; |
| ; } |
| define void @two_stores_same_clamped(ptr %a, i32 %x, i32 %y) { |
| ; CHECK-LABEL: 'two_stores_same_clamped' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 8 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| store i32 %x, ptr %gep, align 4 |
| store i32 %y, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 8] = A[(i + 1) % 8]; |
| define void @clamped_offset_load(ptr %a) { |
| ; CHECK-LABEL: 'clamped_offset_load' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %lv = load i32, ptr %gep.load, align 4 -> |
| ; CHECK-NEXT: store i32 %lv, ptr %gep.store, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-NEXT: Group GRP0: |
| ; CHECK-NEXT: (Low: %a High: (4100 + %a)) |
| ; CHECK-NEXT: Member: {(4 + %a),+,4}<nw><%loop> |
| ; CHECK-NEXT: Member: {%a,+,4}<nw><%loop> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {1,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep.load = getelementptr inbounds i32, ptr %a, i64 %idx.load: |
| ; CHECK-NEXT: ((4 * (zext i3 {1,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {(4 + %a),+,4}<nw><%loop> |
| ; CHECK-NEXT: [PSE] %gep.store = getelementptr inbounds i32, ptr %a, i64 %idx.store: |
| ; CHECK-NEXT: ((4 * (zext i3 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %iv.plus1 = add nuw nsw i64 %iv, 1 |
| %idx.store = urem i64 %iv, 8 |
| %idx.load = urem i64 %iv.plus1, 8 |
| %gep.load = getelementptr inbounds i32, ptr %a, i64 %idx.load |
| %lv = load i32, ptr %gep.load, align 4 |
| %gep.store = getelementptr inbounds i32, ptr %a, i64 %idx.store |
| store i32 %lv, ptr %gep.store, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 2] += 1; |
| define void @clamped_small_bound(ptr %a) { |
| ; CHECK-LABEL: 'clamped_small_bound' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %lv = load i32, ptr %gep, align 4 -> |
| ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {false,+,true}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep = getelementptr inbounds i32, ptr %a, i64 %idx: |
| ; CHECK-NEXT: ((4 * (zext i1 {false,+,true}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,-4}<%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 2 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %lv = load i32, ptr %gep, align 4 |
| %add = add i32 %lv, 1 |
| store i32 %add, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; sum += A[i % 8]; |
| define i32 @clamped_read_only(ptr noalias %a) { |
| ; CHECK-LABEL: 'clamped_read_only' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ] |
| %idx = urem i64 %iv, 8 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %lv = load i32, ptr %gep, align 4 |
| %sum.next = add i32 %sum, %lv |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret i32 %sum.next |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 8] = A[i] + 1; |
| define void @clamped_and_linear_same_array(ptr %a) { |
| ; CHECK-LABEL: 'clamped_and_linear_same_array' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %lv = load i32, ptr %gep.load, align 4 -> |
| ; CHECK-NEXT: store i32 %add, ptr %gep.store, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-NEXT: Group GRP0: |
| ; CHECK-NEXT: (Low: %a High: (4096 + %a)) |
| ; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop> |
| ; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep.store = getelementptr inbounds i32, ptr %a, i64 %idx.mod: |
| ; CHECK-NEXT: ((4 * (zext i3 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx.mod = urem i64 %iv, 8 |
| %gep.load = getelementptr inbounds i32, ptr %a, i64 %iv |
| %lv = load i32, ptr %gep.load, align 4 |
| %add = add i32 %lv, 1 |
| %gep.store = getelementptr inbounds i32, ptr %a, i64 %idx.mod |
| store i32 %add, ptr %gep.store, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 1] = i; |
| define void @clamped_one(ptr %a) { |
| ; CHECK-LABEL: 'clamped_one' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 1 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %val = trunc i64 %iv to i32 |
| store i32 %val, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 4] = A[i % 8] + 1; |
| define void @different_clamped_bounds_same_array(ptr %a) { |
| ; CHECK-LABEL: 'different_clamped_bounds_same_array' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %lv = load i32, ptr %gep.load, align 4 -> |
| ; CHECK-NEXT: store i32 %add, ptr %gep.store, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-NEXT: Group GRP0: |
| ; CHECK-NEXT: (Low: %a High: (4096 + %a)) |
| ; CHECK-NEXT: Member: {%a,+,4}<nw><%loop> |
| ; CHECK-NEXT: Member: {%a,+,4}<nw><%loop> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep.load = getelementptr inbounds i32, ptr %a, i64 %idx.load: |
| ; CHECK-NEXT: ((4 * (zext i3 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; CHECK-NEXT: [PSE] %gep.store = getelementptr inbounds i32, ptr %a, i64 %idx.store: |
| ; CHECK-NEXT: ((4 * (zext i2 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx.load = urem i64 %iv, 8 |
| %idx.store = urem i64 %iv, 4 |
| %gep.load = getelementptr inbounds i32, ptr %a, i64 %idx.load |
| %lv = load i32, ptr %gep.load, align 4 |
| %add = add i32 %lv, 1 |
| %gep.store = getelementptr inbounds i32, ptr %a, i64 %idx.store |
| store i32 %add, ptr %gep.store, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[(2*i) % 8] = i; |
| define void @clamped_non_unit_step(ptr %a) { |
| ; CHECK-LABEL: 'clamped_non_unit_step' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %iv2 = shl i64 %iv, 1 |
| %idx = urem i64 %iv2, 8 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %val = trunc i64 %iv to i32 |
| store i32 %val, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 8] = i; // i8 type (1 byte) |
| define void @clamped_i8_type(ptr %a) { |
| ; CHECK-LABEL: 'clamped_i8_type' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 8 |
| %gep = getelementptr inbounds i8, ptr %a, i64 %idx |
| %val = trunc i64 %iv to i8 |
| store i8 %val, ptr %gep, align 1 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 4] += 1; // i64 type (8 bytes) |
| define void @clamped_i64_type(ptr %a) { |
| ; CHECK-LABEL: 'clamped_i64_type' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %lv = load i64, ptr %gep, align 8 -> |
| ; CHECK-NEXT: store i64 %add, ptr %gep, align 8 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep = getelementptr inbounds i64, ptr %a, i64 %idx: |
| ; CHECK-NEXT: ((8 * (zext i2 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,8}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 4 |
| %gep = getelementptr inbounds i64, ptr %a, i64 %idx |
| %lv = load i64, ptr %gep, align 8 |
| %add = add i64 %lv, 1 |
| store i64 %add, ptr %gep, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < N; i++) |
| ; A[i % C] = B[i]; |
| define void @clamped_non_constant(ptr %a, ptr %b, i64 %c) { |
| ; CHECK-LABEL: 'clamped_non_constant' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Report: cannot identify array bounds |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, %c |
| %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv |
| %lv = load i32, ptr %gep.b, align 4 |
| %gep.a = getelementptr inbounds i32, ptr %a, i64 %idx |
| store i32 %lv, ptr %gep.a, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; for (i = 0; i < 1024; i++) |
| ; A[i % 16] = B[i % 16] + 1; |
| define void @clamped_different_arrays_same_bound(ptr %a, ptr %b) { |
| ; CHECK-LABEL: 'clamped_different_arrays_same_bound' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe with run-time checks |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Check 0: |
| ; CHECK-NEXT: Comparing group GRP0: |
| ; CHECK-NEXT: %gep.a = getelementptr inbounds i32, ptr %a, i64 %idx |
| ; CHECK-NEXT: Against group GRP1: |
| ; CHECK-NEXT: %gep.b = getelementptr inbounds i32, ptr %b, i64 %idx |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-NEXT: Group GRP0: |
| ; CHECK-NEXT: (Low: %a High: (4096 + %a)) |
| ; CHECK-NEXT: Member: {%a,+,4}<nw><%loop> |
| ; CHECK-NEXT: Group GRP1: |
| ; CHECK-NEXT: (Low: %b High: (4096 + %b)) |
| ; CHECK-NEXT: Member: {%b,+,4}<nw><%loop> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep.b = getelementptr inbounds i32, ptr %b, i64 %idx: |
| ; CHECK-NEXT: ((4 * (zext i4 {0,+,1}<%loop> to i64))<nuw><nsw> + %b)<nuw> |
| ; CHECK-NEXT: --> {%b,+,4}<nw><%loop> |
| ; CHECK-NEXT: [PSE] %gep.a = getelementptr inbounds i32, ptr %a, i64 %idx: |
| ; CHECK-NEXT: ((4 * (zext i4 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 16 |
| %gep.b = getelementptr inbounds i32, ptr %b, i64 %idx |
| %lv = load i32, ptr %gep.b, align 4 |
| %add = add i32 %lv, 1 |
| %gep.a = getelementptr inbounds i32, ptr %a, i64 %idx |
| store i32 %add, ptr %gep.a, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; Two stores with different modulo bounds to same array. |
| ; for (i = 0; i < 1024; i++) { |
| ; A[i % 4] = x; |
| ; A[i % 8] = y; |
| ; } |
| define void @two_stores_different_clamped(ptr %a, i32 %x, i32 %y) { |
| ; CHECK-LABEL: 'two_stores_different_clamped' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: store i32 %x, ptr %gep4, align 4 -> |
| ; CHECK-NEXT: store i32 %y, ptr %gep8, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-NEXT: Group GRP0: |
| ; CHECK-NEXT: (Low: %a High: (4096 + %a)) |
| ; CHECK-NEXT: Member: {%a,+,4}<nw><%loop> |
| ; CHECK-NEXT: Member: {%a,+,4}<nw><%loop> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep4 = getelementptr inbounds i32, ptr %a, i64 %idx4: |
| ; CHECK-NEXT: ((4 * (zext i2 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; CHECK-NEXT: [PSE] %gep8 = getelementptr inbounds i32, ptr %a, i64 %idx8: |
| ; CHECK-NEXT: ((4 * (zext i3 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx4 = urem i64 %iv, 4 |
| %idx8 = urem i64 %iv, 8 |
| %gep4 = getelementptr inbounds i32, ptr %a, i64 %idx4 |
| store i32 %x, ptr %gep4, align 4 |
| %gep8 = getelementptr inbounds i32, ptr %a, i64 %idx8 |
| store i32 %y, ptr %gep8, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @srem_not_matched_as_clamped(ptr %a) { |
| ; CHECK-LABEL: 'srem_not_matched_as_clamped' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = srem i64 %iv, 8 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %val = trunc i64 %iv to i32 |
| store i32 %val, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @clamped_multi_index_gep(ptr %a) { |
| ; CHECK-LABEL: 'clamped_multi_index_gep' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 7 |
| %gep = getelementptr inbounds [16 x i32], ptr %a, i64 0, i64 %idx |
| %val = trunc i64 %iv to i32 |
| store i32 %val, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @clamped_mod_256_narrow_cutoff(ptr %a) { |
| ; CHECK-LABEL: 'clamped_mod_256_narrow_cutoff' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %lv = load i32, ptr %gep, align 4 -> |
| ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep = getelementptr inbounds i32, ptr %a, i64 %idx: |
| ; CHECK-NEXT: ((4 * (zext i8 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 256 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %lv = load i32, ptr %gep, align 4 |
| %add = add i32 %lv, 1 |
| store i32 %add, ptr %gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @clamped_with_const_offset(ptr %a) { |
| ; CHECK-LABEL: 'clamped_with_const_offset' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %lv = load i32, ptr %gep.off, align 4 -> |
| ; CHECK-NEXT: store i32 %add, ptr %gep.off, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep.off = getelementptr inbounds i8, ptr %gep, i64 16: |
| ; CHECK-NEXT: (16 + (4 * (zext i2 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {(16 + %a),+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 4 |
| %gep = getelementptr inbounds i32, ptr %a, i64 %idx |
| %gep.off = getelementptr inbounds i8, ptr %gep, i64 16 |
| %lv = load i32, ptr %gep.off, align 4 |
| %add = add i32 %lv, 1 |
| store i32 %add, ptr %gep.off, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @clamped_load_clamped_store_same(ptr %a) { |
| ; CHECK-LABEL: 'clamped_load_clamped_store_same' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop |
| ; CHECK-NEXT: Backward loop carried data dependence. |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Backward: |
| ; CHECK-NEXT: %lv = load i32, ptr %gep.l, align 4 -> |
| ; CHECK-NEXT: store i32 %add, ptr %gep.s, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-NEXT: Group GRP0: |
| ; CHECK-NEXT: (Low: %a High: (4100 + %a)) |
| ; CHECK-NEXT: Member: {%a,+,4}<nw><%loop> |
| ; CHECK-NEXT: Member: {(4 + %a),+,4}<nw><%loop> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {1,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep.l = getelementptr inbounds i32, ptr %a, i64 %idx.l: |
| ; CHECK-NEXT: ((4 * (zext i2 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; CHECK-NEXT: [PSE] %gep.s = getelementptr inbounds i32, ptr %a, i64 %idx.s: |
| ; CHECK-NEXT: ((4 * (zext i2 {1,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {(4 + %a),+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx.l = urem i64 %iv, 4 |
| %iv.p1 = add i64 %iv, 1 |
| %idx.s = urem i64 %iv.p1, 4 |
| %gep.l = getelementptr inbounds i32, ptr %a, i64 %idx.l |
| %gep.s = getelementptr inbounds i32, ptr %a, i64 %idx.s |
| %lv = load i32, ptr %gep.l, align 4 |
| %add = add i32 %lv, 1 |
| store i32 %add, ptr %gep.s, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @clamped_i24_store_size(ptr %a, ptr %b) { |
| ; CHECK-LABEL: 'clamped_i24_store_size' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Memory dependences are safe with run-time checks |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: Forward: |
| ; CHECK-NEXT: %lv = load i24, ptr %gep.a, align 4 -> |
| ; CHECK-NEXT: store i24 %add, ptr %gep.a, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Check 0: |
| ; CHECK-NEXT: Comparing group GRP0: |
| ; CHECK-NEXT: %gep.b = getelementptr inbounds i24, ptr %b, i64 %iv |
| ; CHECK-NEXT: Against group GRP1: |
| ; CHECK-NEXT: %gep.a = getelementptr inbounds i24, ptr %a, i64 %idx |
| ; CHECK-NEXT: %gep.a = getelementptr inbounds i24, ptr %a, i64 %idx |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-NEXT: Group GRP0: |
| ; CHECK-NEXT: (Low: %b High: (4095 + %b)) |
| ; CHECK-NEXT: Member: {%b,+,4}<nuw><%loop> |
| ; CHECK-NEXT: Group GRP1: |
| ; CHECK-NEXT: (Low: %a High: (4095 + %a)) |
| ; CHECK-NEXT: Member: {%a,+,4}<nw><%loop> |
| ; CHECK-NEXT: Member: {%a,+,4}<nw><%loop> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep.a = getelementptr inbounds i24, ptr %a, i64 %idx: |
| ; CHECK-NEXT: ((4 * (zext i2 {0,+,1}<%loop> to i64))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,4}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 4 |
| %gep.a = getelementptr inbounds i24, ptr %a, i64 %idx |
| %lv = load i24, ptr %gep.a, align 4 |
| %add = add i24 %lv, 1 |
| store i24 %add, ptr %gep.a, align 4 |
| %gep.b = getelementptr inbounds i24, ptr %b, i64 %iv |
| store i24 %add, ptr %gep.b, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @clamped_scalable_with_dep(ptr %a, ptr %b) { |
| ; CHECK-LABEL: 'clamped_scalable_with_dep' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop |
| ; CHECK-NEXT: Unsafe indirect dependence. |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: IndirectUnsafe: |
| ; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr %gep.a, align 16 -> |
| ; CHECK-NEXT: store i32 %lv, ptr %gep.b2, align 4 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Check 0: |
| ; CHECK-NEXT: Comparing group GRP0: |
| ; CHECK-NEXT: %gep.b2 = getelementptr inbounds i32, ptr %a, i64 %iv |
| ; CHECK-NEXT: Against group GRP2: |
| ; CHECK-NEXT: %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv |
| ; CHECK-NEXT: Check 1: |
| ; CHECK-NEXT: Comparing group GRP1: |
| ; CHECK-NEXT: %gep.a = getelementptr inbounds <vscale x 4 x i32>, ptr %a, i64 %idx |
| ; CHECK-NEXT: Against group GRP2: |
| ; CHECK-NEXT: %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-NEXT: Group GRP0: |
| ; CHECK-NEXT: (Low: %a High: (4096 + %a)) |
| ; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop> |
| ; CHECK-NEXT: Group GRP1: |
| ; CHECK-NEXT: (Low: (((16368 * vscale) + %a) umin %a) High: ((16 * vscale) + (((16368 * vscale) + %a) umax %a))) |
| ; CHECK-NEXT: Member: {%a,+,(16 * vscale)}<%loop> |
| ; CHECK-NEXT: Group GRP2: |
| ; CHECK-NEXT: (Low: %b High: (4096 + %b)) |
| ; CHECK-NEXT: Member: {%b,+,4}<nuw><%loop> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep.a = getelementptr inbounds <vscale x 4 x i32>, ptr %a, i64 %idx: |
| ; CHECK-NEXT: ((16 * vscale * (zext i2 {0,+,1}<%loop> to i64)) + %a) |
| ; CHECK-NEXT: --> {%a,+,(16 * vscale)}<%loop> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i64 %iv, 4 |
| %gep.a = getelementptr inbounds <vscale x 4 x i32>, ptr %a, i64 %idx |
| store <vscale x 4 x i32> zeroinitializer, ptr %gep.a, align 16 |
| %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv |
| %lv = load i32, ptr %gep.b, align 4 |
| %gep.b2 = getelementptr inbounds i32, ptr %a, i64 %iv |
| store i32 %lv, ptr %gep.b2, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @clamped_mul_huge_scale_as1(ptr addrspace(1) %a) { |
| ; CHECK-LABEL: 'clamped_mul_huge_scale_as1' |
| ; CHECK-NEXT: loop: |
| ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop |
| ; CHECK-NEXT: Unsafe indirect dependence. |
| ; CHECK-NEXT: Dependences: |
| ; CHECK-NEXT: IndirectUnsafe: |
| ; CHECK-NEXT: %ld = load i64, ptr addrspace(1) %gep, align 8 -> |
| ; CHECK-NEXT: store i64 %add, ptr addrspace(1) %gep, align 8 |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Run-time memory checks: |
| ; CHECK-NEXT: Grouped accesses: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. |
| ; CHECK-NEXT: SCEV assumptions: |
| ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: Expressions re-written: |
| ; CHECK-NEXT: [PSE] %gep = getelementptr inbounds i8, ptr addrspace(1) %a, i128 %off: |
| ; CHECK-NEXT: ((36893488147419103232 * (zext i2 {0,+,1}<%loop> to i128))<nuw><nsw> + %a)<nuw> |
| ; CHECK-NEXT: --> {%a,+,36893488147419103232}<nw><%loop> |
| ; |
| entry: |
| br label %loop |
| loop: |
| %iv = phi i128 [ 0, %entry ], [ %iv.next, %loop ] |
| %idx = urem i128 %iv, 4 |
| %off = mul i128 %idx, 36893488147419103232 |
| %gep = getelementptr inbounds i8, ptr addrspace(1) %a, i128 %off |
| %ld = load i64, ptr addrspace(1) %gep, align 8 |
| %add = add i64 %ld, 1 |
| store i64 %add, ptr addrspace(1) %gep, align 8 |
| %iv.next = add nuw nsw i128 %iv, 1 |
| %ec = icmp eq i128 %iv.next, 1024 |
| br i1 %ec, label %exit, label %loop |
| exit: |
| ret void |
| } |