| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes=slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s |
| |
| declare i64 @may_inf_loop_ro() nounwind readonly |
| |
| ; Base case without allocas or stacksave |
| define void @basecase(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @basecase( |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: store ptr null, ptr [[A]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) |
| ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| |
| %v1 = load ptr, ptr %a |
| store ptr zeroinitializer, ptr %a |
| %a2 = getelementptr ptr, ptr %a, i32 1 |
| %v2 = load ptr, ptr %a2 |
| |
| %add1 = getelementptr i8, ptr %v1, i32 1 |
| %add2 = getelementptr i8, ptr %v2, i32 1 |
| |
| store ptr %add1, ptr %b |
| %b2 = getelementptr ptr, ptr %b, i32 1 |
| store ptr %add2, ptr %b2 |
| ret void |
| } |
| |
| ; Using two allocas and a buildvector |
| define void @allocas(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @allocas( |
| ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 |
| ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| |
| %v1 = alloca i8 |
| %add1 = getelementptr i8, ptr %v1, i32 1 |
| store ptr %add1, ptr %a |
| %v2 = alloca i8 |
| |
| %add2 = getelementptr i8, ptr %v2, i32 1 |
| |
| store ptr %add1, ptr %b |
| %b2 = getelementptr ptr, ptr %b, i32 1 |
| store ptr %add2, ptr %b2 |
| ret void |
| } |
| |
| ; Allocas can not be speculated above a potentially non-returning call |
| define void @allocas_speculation(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @allocas_speculation( |
| ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1 |
| ; CHECK-NEXT: store ptr [[ADD1]], ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() |
| ; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1 |
| ; CHECK-NEXT: store ptr [[ADD1]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1 |
| ; CHECK-NEXT: store ptr [[ADD2]], ptr [[B2]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| |
| %v1 = alloca i8 |
| %add1 = getelementptr i8, ptr %v1, i32 1 |
| store ptr %add1, ptr %a |
| call i64 @may_inf_loop_ro() |
| %v2 = alloca i8 |
| |
| %add2 = getelementptr i8, ptr %v2, i32 1 |
| |
| store ptr %add1, ptr %b |
| %b2 = getelementptr ptr, ptr %b, i32 1 |
| store ptr %add2, ptr %b2 |
| ret void |
| } |
| |
| ; We must be careful not to lift the inalloca alloc above the stacksave here. |
| ; We used to miscompile this example before adding explicit dependency handling |
| ; for stacksave. |
| define void @stacksave(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @stacksave( |
| ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1 |
| ; CHECK-NEXT: store ptr [[ADD1]], ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() |
| ; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1 |
| ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4:[0-9]+]] |
| ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) |
| ; CHECK-NEXT: [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1 |
| ; CHECK-NEXT: store ptr [[ADD1]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1 |
| ; CHECK-NEXT: store ptr [[ADD2]], ptr [[B2]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| |
| %v1 = alloca i8 |
| %add1 = getelementptr i8, ptr %v1, i32 1 |
| store ptr %add1, ptr %a |
| |
| %stack = call ptr @llvm.stacksave() |
| %v2 = alloca inalloca i8 |
| call void @use(ptr inalloca(i8) %v2) readnone |
| call void @llvm.stackrestore(ptr %stack) |
| |
| %add2 = getelementptr i8, ptr %v2, i32 1 |
| |
| store ptr %add1, ptr %b |
| %b2 = getelementptr ptr, ptr %b, i32 1 |
| store ptr %add2, ptr %b2 |
| ret void |
| } |
| |
| define void @stacksave2(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @stacksave2( |
| ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() |
| ; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 |
| ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]] |
| ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) |
| ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| |
| %v1 = alloca i8 |
| %add1 = getelementptr i8, ptr %v1, i32 1 |
| |
| %stack = call ptr @llvm.stacksave() |
| store ptr %add1, ptr %a |
| %v2 = alloca inalloca i8 |
| call void @use(ptr inalloca(i8) %v2) readonly |
| call void @llvm.stackrestore(ptr %stack) |
| |
| %add2 = getelementptr i8, ptr %v2, i32 1 |
| |
| store ptr %add1, ptr %b |
| %b2 = getelementptr ptr, ptr %b, i32 1 |
| store ptr %add2, ptr %b2 |
| ret void |
| } |
| |
| define void @stacksave3(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @stacksave3( |
| ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() |
| ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1 |
| ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4]] |
| ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) |
| ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| |
| %stack = call ptr @llvm.stacksave() |
| %v1 = alloca i8 |
| |
| %v2 = alloca inalloca i8 |
| call void @use(ptr inalloca(i8) %v2) readnone |
| call void @llvm.stackrestore(ptr %stack) |
| |
| %add1 = getelementptr i8, ptr %v1, i32 1 |
| %add2 = getelementptr i8, ptr %v2, i32 1 |
| |
| store ptr %add1, ptr %b |
| %b2 = getelementptr ptr, ptr %b, i32 1 |
| store ptr %add2, ptr %b2 |
| ret void |
| } |
| |
| ; Here we have an alloca which needs to stay under the stacksave, but is not |
| ; directly part of the vectorization tree. Instead, the stacksave is |
| ; encountered during dependency scanning via the memory chain. |
| define void @stacksave4(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @stacksave4( |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) |
| ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() |
| ; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1 |
| ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]] |
| ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) |
| ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| |
| %v1 = load ptr, ptr %a |
| %a2 = getelementptr ptr, ptr %a, i32 1 |
| %v2 = load ptr, ptr %a2 |
| |
| %add1 = getelementptr i8, ptr %v1, i32 1 |
| %add2 = getelementptr i8, ptr %v2, i32 1 |
| |
| %stack = call ptr @llvm.stacksave() |
| %x = alloca inalloca i8 |
| call void @use(ptr inalloca(i8) %x) readnone |
| call void @llvm.stackrestore(ptr %stack) |
| |
| store ptr %add1, ptr %b |
| %b2 = getelementptr ptr, ptr %b, i32 1 |
| store ptr %add2, ptr %b2 |
| ret void |
| } |
| |
| define void @stacksave5(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @stacksave5( |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) |
| ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() |
| ; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1 |
| ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]] |
| ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) |
| ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| |
| %v1 = load ptr, ptr %a |
| %a2 = getelementptr ptr, ptr %a, i32 1 |
| %v2 = load ptr, ptr %a2 |
| |
| %add1 = getelementptr i8, ptr %v1, i32 1 |
| %add2 = getelementptr i8, ptr %v2, i32 1 |
| |
| %stack = call ptr @llvm.stacksave() |
| %x = alloca inalloca i8 |
| call void @use(ptr inalloca(i8) %x) readnone |
| call void @llvm.stackrestore(ptr %stack) |
| |
| store ptr %add1, ptr %b |
| %b2 = getelementptr ptr, ptr %b, i32 1 |
| store ptr %add2, ptr %b2 |
| ret void |
| } |
| |
| ; Reordering the second alloca above the stackrestore while |
| ; leaving the write to it below would introduce a write-after-free |
| ; bug. |
| define void @stackrestore1(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @stackrestore1( |
| ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() |
| ; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: store i8 0, ptr [[V1]], align 1 |
| ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) |
| ; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: store i8 0, ptr [[V2]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) |
| ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| |
| %stack = call ptr @llvm.stacksave() |
| %v1 = alloca i8 |
| store i8 0, ptr %v1 |
| call void @llvm.stackrestore(ptr %stack) |
| %v2 = alloca i8 |
| store i8 0, ptr %v2 |
| |
| %add1 = getelementptr i8, ptr %v1, i32 1 |
| %add2 = getelementptr i8, ptr %v2, i32 1 |
| |
| store ptr %add1, ptr %b |
| %b2 = getelementptr ptr, ptr %b, i32 1 |
| store ptr %add2, ptr %b2 |
| ret void |
| } |
| |
| declare void @use(ptr inalloca(i8)) |
| declare ptr @llvm.stacksave() |
| declare void @llvm.stackrestore(ptr) |
| |
| ; The next set are reduced from previous regressions. |
| |
| declare ptr @wibble(ptr) |
| declare void @quux(ptr inalloca(i32)) |
| |
| define void @ham() #1 { |
| ; CHECK-LABEL: @ham( |
| ; CHECK-NEXT: [[VAR2:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[VAR3:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[VAR4:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[VAR12:%.*]] = alloca [12 x ptr], align 8 |
| ; CHECK-NEXT: [[VAR15:%.*]] = call ptr @wibble(ptr [[VAR2]]) |
| ; CHECK-NEXT: [[VAR16:%.*]] = call ptr @wibble(ptr [[VAR3]]) |
| ; CHECK-NEXT: [[VAR17:%.*]] = call ptr @wibble(ptr [[VAR4]]) |
| ; CHECK-NEXT: [[VAR23:%.*]] = call ptr @llvm.stacksave.p0() |
| ; CHECK-NEXT: [[VAR24:%.*]] = alloca inalloca i32, align 4 |
| ; CHECK-NEXT: call void @quux(ptr inalloca(i32) [[VAR24]]) |
| ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[VAR23]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[VAR12]], align 8 |
| ; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> |
| ; CHECK-NEXT: store <4 x ptr> [[TMP4]], ptr [[VAR36]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %var2 = alloca i8 |
| %var3 = alloca i8 |
| %var4 = alloca i8 |
| %var5 = alloca i8 |
| %var12 = alloca [12 x ptr] |
| %var15 = call ptr @wibble(ptr %var2) |
| %var16 = call ptr @wibble(ptr %var3) |
| %var17 = call ptr @wibble(ptr %var4) |
| %var23 = call ptr @llvm.stacksave() |
| %var24 = alloca inalloca i32 |
| call void @quux(ptr inalloca(i32) %var24) |
| call void @llvm.stackrestore(ptr %var23) |
| store ptr %var4, ptr %var12 |
| %var33 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 1 |
| store ptr %var4, ptr %var33 |
| %var34 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 2 |
| store ptr %var4, ptr %var34 |
| %var35 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 3 |
| store ptr %var4, ptr %var35 |
| %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4 |
| store ptr %var4, ptr %var36 |
| %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5 |
| store ptr %var5, ptr %var37 |
| %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6 |
| store ptr %var5, ptr %var38 |
| %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7 |
| store ptr %var5, ptr %var39 |
| ret void |
| } |
| |
| define void @spam() #1 { |
| ; CHECK-LABEL: @spam( |
| ; CHECK-NEXT: [[VAR4:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1 |
| ; CHECK-NEXT: [[VAR12:%.*]] = alloca [12 x ptr], align 8 |
| ; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> |
| ; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[VAR36]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %var4 = alloca i8 |
| %var5 = alloca i8 |
| %var12 = alloca [12 x ptr] |
| %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4 |
| store ptr %var4, ptr %var36 |
| %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5 |
| store ptr %var5, ptr %var37 |
| %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6 |
| store ptr %var5, ptr %var38 |
| %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7 |
| store ptr %var5, ptr %var39 |
| ret void |
| } |
| |
| attributes #0 = { nofree nosync nounwind willreturn } |
| attributes #1 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" } |