| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s 2>&1 | FileCheck %s |
| |
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" |
| target triple = "x86_64-grtev4-linux-gnu" |
| |
| define void @copy_bitcast_fusion(ptr noalias %foo, ptr noalias %bar) { |
| ; CHECK-LABEL: define void @copy_bitcast_fusion( |
| ; CHECK-SAME: ptr noalias [[FOO:%.*]], ptr noalias [[BAR:%.*]]) { |
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[TMP2:%.*]] = select i1 true, i64 1, i64 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i64 1, i64 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = select i1 true, i64 1, i64 0 |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP2]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP3]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP4]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[FOO]], align 4 |
| ; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4 |
| ; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4 |
| ; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP8]], align 4 |
| ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i32 0 |
| ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i32 1 |
| ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i32 2 |
| ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP12]], i32 3 |
| ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x float> [[TMP18]], <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> |
| ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x float> [[TMP19]], <24 x float> poison, <24 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23> |
| ; CHECK-NEXT: store <24 x float> [[INTERLEAVED_VEC]], ptr [[BAR]], align 4 |
| ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| br label %body |
| |
| body: |
| %iv = phi i64 [ 0, %0 ], [ %ptr3, %body ] |
| %iv.trunc = trunc i64 %iv to i1 |
| %iv.trunc2 = select i1 %iv.trunc, i64 1, i64 0 |
| %unpack.ptr = getelementptr { float, float }, ptr %foo, i64 %iv.trunc2 |
| %unpack = load float, ptr %unpack.ptr, align 4 |
| %idx3 = mul i64 %iv, 24 |
| %bar.ptr = getelementptr i8, ptr %bar, i64 %idx3 |
| store float %unpack, ptr %bar.ptr, align 4 |
| %repack4 = getelementptr i8, ptr %bar.ptr, i64 4 |
| store float 0.000000e+00, ptr %repack4, align 4 |
| %ptr1 = getelementptr i8, ptr %bar.ptr, i64 8 |
| store float 0.000000e+00, ptr %ptr1, align 4 |
| %repack4.1 = getelementptr i8, ptr %bar.ptr, i64 12 |
| store float 0.000000e+00, ptr %repack4.1, align 4 |
| %ptr2 = getelementptr i8, ptr %bar.ptr, i64 16 |
| store float 0.000000e+00, ptr %ptr2, align 4 |
| %repack4.2 = getelementptr i8, ptr %bar.ptr, i64 20 |
| store float 0.000000e+00, ptr %repack4.2, align 4 |
| %ptr3 = add i64 %iv, 1 |
| %exitcond.not = icmp eq i64 %ptr3, 4 |
| br i1 %exitcond.not, label %exit, label %body |
| |
| exit: |
| ret void |
| } |