| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -basic-aa -slp-vectorizer -S | FileCheck %s |
| |
| target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" |
| target triple = "aarch64--linux-gnu" |
| |
| ; These examples correspond to input code like: |
| ; |
| ; void t(long * __restrict a, long * __restrict b) { |
| ; a[0] *= b[0]; |
| ; a[1] *= b[1]; |
| ; } |
| ; |
| ; If we SLP vectorise this then we end up with something like this because we |
| ; don't have a mul.2d: |
| ; |
| ; ldr q0, [x1] |
| ; ldr q1, [x0] |
| ; fmov x8, d0 |
| ; mov x10, v0.d[1] |
| ; fmov x9, d1 |
| ; mov x11, v1.d[1] |
| ; mul x8, x9, x8 |
| ; mul x9, x11, x10 |
| ; fmov d0, x8 |
| ; mov v0.d[1], x9 |
| ; str q0, [x0] |
| ; ret |
| ; |
| ; If we don't SLP vectorise but scalarize this we get this instead: |
| ; |
| ; ldp x8, x9, [x1] |
| ; ldp x10, x11, [x0] |
| ; mul x9, x11, x9 |
| ; mul x8, x10, x8 |
| ; stp x8, x9, [x0] |
| ; ret |
| ; |
| define void @mul(i64* noalias nocapture %a, i64* noalias nocapture readonly %b) { |
| ; CHECK-LABEL: @mul( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: store i64 [[MUL]], i64* [[A]], align 8 |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[ARRAYIDX2]], align 8 |
| ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8 |
| ; CHECK-NEXT: [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]] |
| ; CHECK-NEXT: store i64 [[MUL4]], i64* [[ARRAYIDX3]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %0 = load i64, i64* %b, align 8 |
| %1 = load i64, i64* %a, align 8 |
| %mul = mul nsw i64 %1, %0 |
| store i64 %mul, i64* %a, align 8 |
| %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 1 |
| %2 = load i64, i64* %arrayidx2, align 8 |
| %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1 |
| %3 = load i64, i64* %arrayidx3, align 8 |
| %mul4 = mul nsw i64 %3, %2 |
| store i64 %mul4, i64* %arrayidx3, align 8 |
| ret void |
| } |
| |
| ; Similar example, but now a multiply-accumulate: |
| ; |
| ; void x (long * __restrict a, long * __restrict b) { |
| ; a[0] *= b[0]; |
| ; a[1] *= b[1]; |
| ; a[0] += b[0]; |
| ; a[1] += b[1]; |
| ; } |
| ; |
| define void @mac(i64* noalias nocapture %a, i64* noalias nocapture readonly %b) { |
| ; CHECK-LABEL: @mac( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[ARRAYIDX2]], align 8 |
| ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8 |
| ; CHECK-NEXT: [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]] |
| ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[MUL]], [[TMP0]] |
| ; CHECK-NEXT: store i64 [[ADD]], i64* [[A]], align 8 |
| ; CHECK-NEXT: [[ADD9:%.*]] = add nsw i64 [[MUL4]], [[TMP2]] |
| ; CHECK-NEXT: store i64 [[ADD9]], i64* [[ARRAYIDX3]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %0 = load i64, i64* %b, align 8 |
| %1 = load i64, i64* %a, align 8 |
| %mul = mul nsw i64 %1, %0 |
| %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 1 |
| %2 = load i64, i64* %arrayidx2, align 8 |
| %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1 |
| %3 = load i64, i64* %arrayidx3, align 8 |
| %mul4 = mul nsw i64 %3, %2 |
| %add = add nsw i64 %mul, %0 |
| store i64 %add, i64* %a, align 8 |
| %add9 = add nsw i64 %mul4, %2 |
| store i64 %add9, i64* %arrayidx3, align 8 |
| ret void |
| } |