| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s |
| |
| define i32 @test() { |
| ; CHECK-LABEL: define i32 @test( |
| ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[BB:.*:]] |
| ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 32), align 4 |
| ; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 32), align 4 |
| ; CHECK-NEXT: [[LOAD9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 28), align 4 |
| ; CHECK-NEXT: store i32 [[LOAD9]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 28), align 4 |
| ; CHECK-NEXT: [[LOAD14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 28), align 4 |
| ; CHECK-NEXT: store i32 [[LOAD14]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 28), align 4 |
| ; CHECK-NEXT: [[LOAD15:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 24), align 4 |
| ; CHECK-NEXT: store i32 [[LOAD15]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 24), align 4 |
| ; CHECK-NEXT: [[LOAD20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 24), align 4 |
| ; CHECK-NEXT: store i32 [[LOAD20]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 24), align 4 |
| ; CHECK-NEXT: [[LOAD21:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 20), align 4 |
| ; CHECK-NEXT: store i32 [[LOAD21]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 20), align 4 |
| ; CHECK-NEXT: [[LOAD26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 20), align 4 |
| ; CHECK-NEXT: store i32 [[LOAD26]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 20), align 4 |
| ; CHECK-NEXT: [[LOAD27:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 16), align 4 |
| ; CHECK-NEXT: store i32 [[LOAD27]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 16), align 4 |
| ; CHECK-NEXT: [[LOAD32:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 16), align 4 |
| ; CHECK-NEXT: store i32 [[LOAD32]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 16), align 4 |
| ; CHECK-NEXT: [[LOAD33:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 12), align 4 |
| ; CHECK-NEXT: store i32 [[LOAD33]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 12), align 4 |
| ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 [[LOAD]], i32 6 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <24 x i32> [[TMP0]], i32 [[LOAD9]], i32 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <24 x i32> [[TMP1]], i32 [[LOAD14]], i32 9 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <24 x i32> [[TMP2]], i32 [[LOAD15]], i32 10 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <24 x i32> [[TMP3]], i32 [[LOAD20]], i32 11 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <24 x i32> [[TMP4]], i32 [[LOAD21]], i32 12 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <24 x i32> [[TMP5]], i32 [[LOAD26]], i32 13 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <24 x i32> [[TMP6]], i32 [[LOAD27]], i32 14 |
| ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <24 x i32> [[TMP7]], i32 [[LOAD32]], i32 15 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <24 x i32> [[TMP8]], i32 [[LOAD33]], i32 16 |
| ; CHECK-NEXT: [[TMP10:%.*]] = call <24 x i32> @llvm.ctpop.v24i32(<24 x i32> [[TMP9]]) |
| ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.or.v24i32(<24 x i32> [[TMP10]]) |
| ; CHECK-NEXT: ret i32 [[TMP11]] |
| ; |
| bb: |
| %call = call i32 @llvm.ctpop.i32(i32 0) |
| %call1 = call i32 @llvm.ctpop.i32(i32 0) |
| %load = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 32), align 4 |
| store i32 %load, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 32), align 4 |
| %or = or i32 %call, %call1 |
| %call2 = call i32 @llvm.ctpop.i32(i32 0) |
| %or3 = or i32 %or, %call2 |
| %call4 = call i32 @llvm.ctpop.i32(i32 0) |
| %load9 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 28), align 4 |
| store i32 %load9, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 28), align 4 |
| %or5 = or i32 %or3, %call4 |
| %call6 = call i32 @llvm.ctpop.i32(i32 0) |
| %or7 = or i32 %or5, %call6 |
| %call8 = call i32 @llvm.ctpop.i32(i32 0) |
| %load14 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 28), align 4 |
| store i32 %load14, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 28), align 4 |
| %load15 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 24), align 4 |
| store i32 %load15, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 24), align 4 |
| %or10 = or i32 %or7, %call8 |
| %call11 = call i32 @llvm.ctpop.i32(i32 %load) |
| %or12 = or i32 %or10, %call11 |
| %call13 = call i32 @llvm.ctpop.i32(i32 0) |
| %load20 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 24), align 4 |
| store i32 %load20, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 24), align 4 |
| %load21 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 20), align 4 |
| store i32 %load21, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 20), align 4 |
| %or16 = or i32 %or12, %call13 |
| %call17 = call i32 @llvm.ctpop.i32(i32 %load9) |
| %or18 = or i32 %or16, %call17 |
| %call19 = call i32 @llvm.ctpop.i32(i32 %load14) |
| %load26 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 20), align 4 |
| store i32 %load26, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 20), align 4 |
| %load27 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 16), align 4 |
| store i32 %load27, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 16), align 4 |
| %or22 = or i32 %or18, %call19 |
| %call23 = call i32 @llvm.ctpop.i32(i32 %load15) |
| %or24 = or i32 %or22, %call23 |
| %call25 = call i32 @llvm.ctpop.i32(i32 %load20) |
| %load32 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 16), align 4 |
| store i32 %load32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 16), align 4 |
| %load33 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 12), align 4 |
| store i32 %load33, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 12), align 4 |
| %or28 = or i32 %or24, %call25 |
| %call29 = call i32 @llvm.ctpop.i32(i32 %load21) |
| %or30 = or i32 %or28, %call29 |
| %call31 = call i32 @llvm.ctpop.i32(i32 %load26) |
| %or34 = or i32 %or30, %call31 |
| %call35 = call i32 @llvm.ctpop.i32(i32 %load27) |
| %or36 = or i32 %or34, %call35 |
| %call37 = call i32 @llvm.ctpop.i32(i32 %load32) |
| %or38 = or i32 %or36, %call37 |
| %call39 = call i32 @llvm.ctpop.i32(i32 %load33) |
| %or40 = or i32 %or38, %call39 |
| %call41 = call i32 @llvm.ctpop.i32(i32 0) |
| %or42 = or i32 %or40, %call41 |
| %call43 = call i32 @llvm.ctpop.i32(i32 0) |
| %or44 = or i32 %or42, %call43 |
| %call45 = call i32 @llvm.ctpop.i32(i32 0) |
| %or46 = or i32 %or44, %call45 |
| %call47 = call i32 @llvm.ctpop.i32(i32 0) |
| %or48 = or i32 %or46, %call47 |
| %call49 = call i32 @llvm.ctpop.i32(i32 0) |
| %or50 = or i32 %or48, %call49 |
| %call51 = call i32 @llvm.ctpop.i32(i32 0) |
| %or52 = or i32 %or50, %call51 |
| %call53 = call i32 @llvm.ctpop.i32(i32 0) |
| %or54 = or i32 %or52, %call53 |
| ret i32 %or54 |
| } |
| |
| declare i32 @llvm.ctpop.i32(i32) |