| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes=slp-vectorizer -S < %s | FileCheck %s |
| |
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" |
| target triple = "x86_64-unknown-linux-gnu" |
| |
| define void @"foo"(ptr addrspace(1) %0, ptr addrspace(1) %1) #0 { |
| ; CHECK-LABEL: @foo( |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0:%.*]], i64 8 |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <6 x float> @llvm.masked.load.v6f32.p1(ptr addrspace(1) [[TMP3]], i32 4, <6 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true>, <6 x float> poison) |
| ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <6 x float> [[TMP5]], <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 5, i32 4> |
| ; CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, ptr addrspace(1) [[TMP4]], align 4 |
| ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <6 x float> [[TMP5]], <6 x float> poison, <8 x i32> <i32 0, i32 4, i32 0, i32 4, i32 5, i32 1, i32 5, i32 1> |
| ; CHECK-NEXT: [[TMP13:%.*]] = fmul <8 x float> [[TMP12]], [[TMP9]] |
| ; CHECK-NEXT: [[TMP14:%.*]] = fadd <8 x float> [[TMP13]], zeroinitializer |
| ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> poison, <8 x i32> <i32 0, i32 5, i32 2, i32 7, i32 4, i32 1, i32 6, i32 3> |
| ; CHECK-NEXT: store <8 x float> [[TMP15]], ptr addrspace(1) [[TMP3]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %3 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 8 |
| %4 = load float, ptr addrspace(1) %3 , align 4 |
| %5 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 8 |
| %6 = load float, ptr addrspace(1) %5 , align 4 |
| %7 = fmul float %4, %6 |
| %8 = fadd float %7, 0.000000e+00 |
| %9 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 12 |
| %10 = load float, ptr addrspace(1) %9 , align 4 |
| %11 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 28 |
| %12 = load float, ptr addrspace(1) %11 , align 4 |
| %13 = fmul float %10, %12 |
| %14 = fadd float %13, 0.000000e+00 |
| %15 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 16 |
| %16 = load float, ptr addrspace(1) %15 , align 4 |
| %17 = fmul float %4, %16 |
| %18 = fadd float %17, 0.000000e+00 |
| %19 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 36 |
| %20 = load float, ptr addrspace(1) %19 , align 4 |
| %21 = fmul float %10, %20 |
| %22 = fadd float %21, 0.000000e+00 |
| %23 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 28 |
| %24 = load float, ptr addrspace(1) %23 , align 4 |
| %25 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 24 |
| %26 = load float, ptr addrspace(1) %25 , align 4 |
| %27 = fmul float %24, %26 |
| %28 = fadd float %27, 0.000000e+00 |
| %29 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 24 |
| %30 = load float, ptr addrspace(1) %29 , align 4 |
| %31 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 12 |
| %32 = load float, ptr addrspace(1) %31 , align 4 |
| %33 = fmul float %30, %32 |
| %34 = fadd float %33, 0.000000e+00 |
| %35 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 32 |
| %36 = load float, ptr addrspace(1) %35 , align 4 |
| %37 = fmul float %24, %36 |
| %38 = fadd float %37, 0.000000e+00 |
| %39 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 20 |
| %40 = load float, ptr addrspace(1) %39 , align 4 |
| %41 = fmul float %30, %40 |
| %42 = fadd float %41, 0.000000e+00 |
| store float %8, ptr addrspace(1) %3 , align 4 |
| store float %14, ptr addrspace(1) %9 , align 4 |
| %43 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 16 |
| store float %18, ptr addrspace(1) %43 , align 4 |
| %44 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 20 |
| store float %22, ptr addrspace(1) %44 , align 4 |
| store float %28, ptr addrspace(1) %29 , align 4 |
| store float %34, ptr addrspace(1) %23 , align 4 |
| %45 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 32 |
| store float %38, ptr addrspace(1) %45 , align 4 |
| %46 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 36 |
| store float %42, ptr addrspace(1) %46 , align 4 |
| ret void |
| } |
| |
| attributes #0 = { "target-cpu"="skylake" } |
| |