| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE |
| ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -mattr=+avx -S | FileCheck %s --check-prefix=AVX |
| ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX |
| |
| %class.1 = type { %class.2 } |
| %class.2 = type { %"class.3" } |
| %"class.3" = type { %"struct.1", i64 } |
| %"struct.1" = type { [8 x i64] } |
| |
| ; Function Attrs: uwtable |
| define void @_ZN1C10SwitchModeEv(ptr %p, i64 %c) { |
| ; SSE-LABEL: @_ZN1C10SwitchModeEv( |
| ; SSE-NEXT: for.body.lr.ph.i: |
| ; SSE-NEXT: [[BAR5:%.*]] = or i64 [[C:%.*]], 1 |
| ; SSE-NEXT: store i64 [[BAR5]], ptr [[FOO_2:%.*]], align 8 |
| ; SSE-NEXT: [[FOO_4:%.*]] = load i64, ptr [[FOO_2]], align 8 |
| ; SSE-NEXT: [[FOO_3:%.*]] = getelementptr inbounds [[CLASS_1:%.*]], ptr [[FOO_2]], i64 0, i32 0, i32 0, i32 0, i32 0, i64 1 |
| ; SSE-NEXT: [[FOO_5:%.*]] = load i64, ptr [[FOO_3]], align 8 |
| ; SSE-NEXT: [[BAR6:%.*]] = load i64, ptr [[FOO_2]], align 8 |
| ; SSE-NEXT: [[AND_1:%.*]] = and i64 [[BAR5]], [[FOO_4]] |
| ; SSE-NEXT: [[AND_2:%.*]] = and i64 [[BAR6]], [[FOO_5]] |
| ; SSE-NEXT: store i64 [[AND_1]], ptr [[FOO_2]], align 8 |
| ; SSE-NEXT: [[BAR4:%.*]] = getelementptr inbounds [[CLASS_2:%.*]], ptr [[FOO_2]], i64 0, i32 0, i32 0, i32 0, i64 1 |
| ; SSE-NEXT: store i64 [[AND_2]], ptr [[BAR4]], align 8 |
| ; SSE-NEXT: ret void |
| ; |
| ; AVX-LABEL: @_ZN1C10SwitchModeEv( |
| ; AVX-NEXT: for.body.lr.ph.i: |
| ; AVX-NEXT: [[OR_1:%.*]] = or i64 [[C:%.*]], 1 |
| ; AVX-NEXT: store i64 [[OR_1]], ptr [[P:%.*]], align 8 |
| ; AVX-NEXT: [[BAR5:%.*]] = load i64, ptr [[P]], align 8 |
| ; AVX-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[P]], align 8 |
| ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[OR_1]], i32 0 |
| ; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[BAR5]], i32 1 |
| ; AVX-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], [[TMP0]] |
| ; AVX-NEXT: store <2 x i64> [[TMP3]], ptr [[P]], align 8 |
| ; AVX-NEXT: ret void |
| ; |
| for.body.lr.ph.i: |
| %or.1 = or i64 %c, 1 |
| store i64 %or.1, ptr %p, align 8 |
| %foo.3 = load i64, ptr %p, align 8 |
| %foo.2 = getelementptr inbounds %class.1, ptr %p, i64 0, i32 0, i32 0, i32 0, i32 0, i64 1 |
| %foo.4 = load i64, ptr %foo.2, align 8 |
| %bar5 = load i64, ptr %p, align 8 |
| %and.2 = and i64 %or.1, %foo.3 |
| %and.1 = and i64 %bar5, %foo.4 |
| store i64 %and.2, ptr %p, align 8 |
| %bar4 = getelementptr inbounds %class.2, ptr %p, i64 0, i32 0, i32 0, i32 0, i64 1 |
| store i64 %and.1, ptr %bar4, align 8 |
| ret void |
| } |
| |
| ; Function Attrs: norecurse nounwind uwtable |
| define void @pr35497(ptr %p, i64 %c) { |
| ; SSE-LABEL: @pr35497( |
| ; SSE-NEXT: entry: |
| ; SSE-NEXT: [[TMP0:%.*]] = load i64, ptr [[P:%.*]], align 1 |
| ; SSE-NEXT: [[AND:%.*]] = shl i64 [[TMP0]], 2 |
| ; SSE-NEXT: [[SHL:%.*]] = and i64 [[AND]], 20 |
| ; SSE-NEXT: [[ADD:%.*]] = add i64 [[C:%.*]], [[C]] |
| ; SSE-NEXT: store i64 [[ADD]], ptr [[P]], align 1 |
| ; SSE-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds [0 x i64], ptr [[P]], i64 0, i64 5 |
| ; SSE-NEXT: [[AND_1:%.*]] = shl i64 [[C]], 2 |
| ; SSE-NEXT: [[SHL_1:%.*]] = and i64 [[AND_1]], 20 |
| ; SSE-NEXT: [[SHR_1:%.*]] = lshr i64 [[C]], 6 |
| ; SSE-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[SHL]], [[SHR_1]] |
| ; SSE-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], ptr [[P]], i64 0, i64 4 |
| ; SSE-NEXT: [[SHR_2:%.*]] = lshr i64 [[C]], 6 |
| ; SSE-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[SHL_1]], [[SHR_2]] |
| ; SSE-NEXT: [[AND_4:%.*]] = shl i64 [[ADD]], 2 |
| ; SSE-NEXT: [[SHL_4:%.*]] = and i64 [[AND_4]], 20 |
| ; SSE-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], ptr [[P]], i64 0, i64 1 |
| ; SSE-NEXT: store i64 [[ADD_1]], ptr [[ARRAYIDX2_5]], align 1 |
| ; SSE-NEXT: [[AND_5:%.*]] = shl nuw nsw i64 [[ADD_1]], 2 |
| ; SSE-NEXT: [[SHL_5:%.*]] = and i64 [[AND_5]], 20 |
| ; SSE-NEXT: [[SHR_5:%.*]] = lshr i64 [[ADD_1]], 6 |
| ; SSE-NEXT: [[ADD_5:%.*]] = add nuw nsw i64 [[SHL_4]], [[SHR_5]] |
| ; SSE-NEXT: store i64 [[ADD_5]], ptr [[ARRAYIDX2_1]], align 1 |
| ; SSE-NEXT: store i64 [[ADD_2]], ptr [[P]], align 1 |
| ; SSE-NEXT: [[SHR_6:%.*]] = lshr i64 [[ADD_2]], 6 |
| ; SSE-NEXT: [[ADD_6:%.*]] = add nuw nsw i64 [[SHL_5]], [[SHR_6]] |
| ; SSE-NEXT: store i64 [[ADD_6]], ptr [[ARRAYIDX2_2]], align 1 |
| ; SSE-NEXT: ret void |
| ; |
| ; AVX-LABEL: @pr35497( |
| ; AVX-NEXT: entry: |
| ; AVX-NEXT: [[TMP0:%.*]] = load i64, ptr [[P:%.*]], align 1 |
| ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[C:%.*]], i32 0 |
| ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer |
| ; AVX-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP2]], splat (i64 6) |
| ; AVX-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], ptr [[P]], i64 0, i64 4 |
| ; AVX-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], ptr [[P]], i64 0, i64 1 |
| ; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP0]], i32 1 |
| ; AVX-NEXT: [[TMP5:%.*]] = shl <2 x i64> [[TMP4]], splat (i64 2) |
| ; AVX-NEXT: [[TMP6:%.*]] = and <2 x i64> [[TMP5]], splat (i64 20) |
| ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP4]], <2 x i32> <i32 1, i32 2> |
| ; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP7]], <2 x i32> <i32 1, i32 3> |
| ; AVX-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP7]], [[TMP8]] |
| ; AVX-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 |
| ; AVX-NEXT: store i64 [[TMP10]], ptr [[P]], align 1 |
| ; AVX-NEXT: [[TMP11:%.*]] = add nuw nsw <2 x i64> [[TMP6]], [[TMP3]] |
| ; AVX-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 |
| ; AVX-NEXT: store i64 [[TMP12]], ptr [[ARRAYIDX2_5]], align 1 |
| ; AVX-NEXT: [[TMP13:%.*]] = shl <2 x i64> [[TMP9]], splat (i64 2) |
| ; AVX-NEXT: [[TMP14:%.*]] = and <2 x i64> [[TMP13]], splat (i64 20) |
| ; AVX-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 |
| ; AVX-NEXT: store i64 [[TMP15]], ptr [[P]], align 1 |
| ; AVX-NEXT: [[TMP16:%.*]] = lshr <2 x i64> [[TMP11]], splat (i64 6) |
| ; AVX-NEXT: [[TMP17:%.*]] = add nuw nsw <2 x i64> [[TMP14]], [[TMP16]] |
| ; AVX-NEXT: store <2 x i64> [[TMP17]], ptr [[ARRAYIDX2_2]], align 1 |
| ; AVX-NEXT: ret void |
| ; |
| entry: |
| %0 = load i64, ptr %p, align 1 |
| %and = shl i64 %0, 2 |
| %shl = and i64 %and, 20 |
| %add = add i64 %c, %c |
| store i64 %add, ptr %p, align 1 |
| %arrayidx2.1 = getelementptr inbounds [0 x i64], ptr %p, i64 0, i64 5 |
| %and.1 = shl i64 %c, 2 |
| %shl.1 = and i64 %and.1, 20 |
| %shr.1 = lshr i64 %c, 6 |
| %add.1 = add nuw nsw i64 %shl, %shr.1 |
| %arrayidx2.2 = getelementptr inbounds [0 x i64], ptr %p, i64 0, i64 4 |
| %shr.2 = lshr i64 %c, 6 |
| %add.2 = add nuw nsw i64 %shl.1, %shr.2 |
| %and.4 = shl i64 %add, 2 |
| %shl.4 = and i64 %and.4, 20 |
| %arrayidx2.5 = getelementptr inbounds [0 x i64], ptr %p, i64 0, i64 1 |
| store i64 %add.1, ptr %arrayidx2.5, align 1 |
| %and.5 = shl nuw nsw i64 %add.1, 2 |
| %shl.5 = and i64 %and.5, 20 |
| %shr.5 = lshr i64 %add.1, 6 |
| %add.5 = add nuw nsw i64 %shl.4, %shr.5 |
| store i64 %add.5, ptr %arrayidx2.1, align 1 |
| store i64 %add.2, ptr %p, align 1 |
| %shr.6 = lshr i64 %add.2, 6 |
| %add.6 = add nuw nsw i64 %shl.5, %shr.6 |
| store i64 %add.6, ptr %arrayidx2.2, align 1 |
| ret void |
| } |