| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-200 -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -S | FileCheck %s |
| |
| ; In order to create a commutative copyable, we must create a TreeEntry that splits basic blocks |
| ; This test hits the commutative logic, but the reorder that occurs does not effect the final output |
| define void @test_commutative(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { |
| ; CHECK-LABEL: @test_commutative( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 1 |
| ; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2]], i32 2 |
| ; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3 |
| ; CHECK-NEXT: [[ARR1:%.*]] = getelementptr i32, ptr [[ARR2]], i32 4 |
| ; CHECK-NEXT: [[T0:%.*]] = load i32, ptr [[ARR2]], align 4 |
| ; CHECK-NEXT: [[T1:%.*]] = load i32, ptr [[GEP2_1]], align 4 |
| ; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[GEP2_2]], align 4 |
| ; CHECK-NEXT: [[T3:%.*]] = load i32, ptr [[GEP2_3]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[ARR3:%.*]], align 4 |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP2]] |
| ; CHECK-NEXT: br label [[BB2:%.*]] |
| ; CHECK: bb2: |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> poison, i32 [[T0]], i32 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T3]], i32 3 |
| ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
| ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <8 x i32> [[TMP9]], [[TMP10]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP11]] |
| ; CHECK-NEXT: store <8 x i32> [[TMP12]], ptr [[ARR2]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %gep1.1 = getelementptr i32, ptr %arr1, i32 1 |
| %gep1.2 = getelementptr i32, ptr %arr1, i32 2 |
| %gep1.3 = getelementptr i32, ptr %arr1, i32 3 |
| %gep1.4 = getelementptr i32, ptr %arr1, i32 4 |
| %gep1.5 = getelementptr i32, ptr %arr1, i32 5 |
| %gep1.6 = getelementptr i32, ptr %arr1, i32 6 |
| %gep1.7 = getelementptr i32, ptr %arr1, i32 7 |
| %gep2.1 = getelementptr i32, ptr %arr2, i32 1 |
| %gep2.2 = getelementptr i32, ptr %arr2, i32 2 |
| %gep2.3 = getelementptr i32, ptr %arr2, i32 3 |
| %gep2.4 = getelementptr i32, ptr %arr2, i32 4 |
| %gep2.5 = getelementptr i32, ptr %arr2, i32 5 |
| %gep2.6 = getelementptr i32, ptr %arr2, i32 6 |
| %gep2.7 = getelementptr i32, ptr %arr2, i32 7 |
| %v0 = load i32, ptr %arr1 |
| %v1 = load i32, ptr %gep1.1 |
| %v2 = load i32, ptr %gep1.2 |
| %v3 = load i32, ptr %gep1.3 |
| %v4 = load i32, ptr %gep1.4 |
| %v5 = load i32, ptr %gep1.5 |
| %v6 = load i32, ptr %gep1.6 |
| %v7 = load i32, ptr %gep1.7 |
| %t0 = load i32, ptr %arr2 |
| %t1 = load i32, ptr %gep2.1 |
| %t2 = load i32, ptr %gep2.2 |
| %t3 = load i32, ptr %gep2.3 |
| %t4 = load i32, ptr %gep2.4 |
| %t5 = load i32, ptr %gep2.5 |
| %t6 = load i32, ptr %gep2.6 |
| %t7 = load i32, ptr %gep2.7 |
| %y1 = add nsw i32 %v1, %v1 |
| %y2 = add nsw i32 %v2, %v2 |
| br label %bb2 |
| bb2: |
| %y0 = add nsw i32 %t0, %v0 |
| %y3 = add nsw i32 %t3, %v3 |
| %y4 = add nsw i32 %t4, %v4 |
| %y5 = add nsw i32 %t5, %v5 |
| %y6 = add nsw i32 %t6, %v6 |
| %y7 = add nsw i32 %t7, %v7 |
| %res0 = add nsw i32 %v0, %y0 |
| %res1 = add nsw i32 %v1, %y1 |
| %res2 = add nsw i32 %v2, %y2 |
| %res3 = add nsw i32 %v3, %y3 |
| %res4 = add nsw i32 %v4, %y4 |
| %res5 = add nsw i32 %v5, %y5 |
| %res6 = add nsw i32 %v6, %y6 |
| %res7 = add nsw i32 %v7, %y7 |
| store i32 %res0, ptr %arr2 |
| store i32 %res1, ptr %gep2.1 |
| store i32 %res2, ptr %gep2.2 |
| store i32 %res3, ptr %gep2.3 |
| store i32 %res4, ptr %gep2.4 |
| store i32 %res5, ptr %gep2.5 |
| store i32 %res6, ptr %gep2.6 |
| store i32 %res7, ptr %gep2.7 |
| ret void |
| } |
| |
| define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { |
| ; CHECK-LABEL: @test_add_udiv( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2 |
| ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], <i32 1, i32 1, i32 42, i32 1> |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A1:%.*]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[A3:%.*]], i32 3 |
| ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP5]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP6]] |
| ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[ARR2:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %gep1.1 = getelementptr i32, ptr %arr1, i32 1 |
| %gep1.2 = getelementptr i32, ptr %arr1, i32 2 |
| %gep1.3 = getelementptr i32, ptr %arr1, i32 3 |
| %gep2.1 = getelementptr i32, ptr %arr2, i32 1 |
| %gep2.2 = getelementptr i32, ptr %arr2, i32 2 |
| %gep2.3 = getelementptr i32, ptr %arr2, i32 3 |
| %v0 = load i32, ptr %arr1 |
| %v1 = load i32, ptr %gep1.1 |
| %v2 = load i32, ptr %gep1.2 |
| %v3 = load i32, ptr %gep1.3 |
| %y0 = add nsw i32 %a0, 1146 |
| %y1 = add nsw i32 %a1, 146 |
| %y2 = add nsw i32 %a2, 42 |
| %y3 = add nsw i32 %a3, 0 |
| %res0 = add nsw i32 %v0, %y0 |
| %res1 = add nsw i32 %v1, %y1 |
| %res2 = udiv i32 %v2, %y2 |
| %res3 = add nsw i32 %v3, %y3 |
| store i32 %res0, ptr %arr2 |
| store i32 %res1, ptr %gep2.1 |
| store i32 %res2, ptr %gep2.2 |
| store i32 %res3, ptr %gep2.3 |
| ret void |
| } |
| |
| define void @test_add_udiv_reorder_add(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { |
| ; CHECK-LABEL: @test_add_udiv_reorder_add( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2 |
| ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], <i32 1, i32 1, i32 42, i32 1> |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A1:%.*]], i32 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3:%.*]], i32 3 |
| ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP6]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP8]] |
| ; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[ARR2:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %gep1.1 = getelementptr i32, ptr %arr1, i32 1 |
| %gep1.2 = getelementptr i32, ptr %arr1, i32 2 |
| %gep1.3 = getelementptr i32, ptr %arr1, i32 3 |
| %gep2.1 = getelementptr i32, ptr %arr2, i32 1 |
| %gep2.2 = getelementptr i32, ptr %arr2, i32 2 |
| %gep2.3 = getelementptr i32, ptr %arr2, i32 3 |
| %v0 = load i32, ptr %arr1 |
| %v1 = load i32, ptr %gep1.1 |
| %v2 = load i32, ptr %gep1.2 |
| %v3 = load i32, ptr %gep1.3 |
| %y0 = add nsw i32 %a0, 1146 |
| %y1 = add nsw i32 %a1, 146 |
| %y2 = add nsw i32 %a2, 42 |
| %y3 = add nsw i32 %a3, 0 |
| %res0 = add nsw i32 %v0, %y0 |
| %res1 = add nsw i32 %y1, %v1 |
| %res2 = udiv i32 %v2, %y2 |
| %res3 = add nsw i32 %v3, %y3 |
| store i32 %res0, ptr %arr2 |
| store i32 %res1, ptr %gep2.1 |
| store i32 %res2, ptr %gep2.2 |
| store i32 %res3, ptr %gep2.3 |
| ret void |
| } |
| |
| define void @test_add_udiv_commuted(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { |
| ; CHECK-LABEL: @test_add_udiv_commuted( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2 |
| ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP4]], <i32 1, i32 1, i32 42, i32 1> |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 |
| ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP5]], [[TMP8]] |
| ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %gep1.1 = getelementptr i32, ptr %arr1, i32 1 |
| %gep1.2 = getelementptr i32, ptr %arr1, i32 2 |
| %gep1.3 = getelementptr i32, ptr %arr1, i32 3 |
| %gep2.1 = getelementptr i32, ptr %arr2, i32 1 |
| %gep2.2 = getelementptr i32, ptr %arr2, i32 2 |
| %gep2.3 = getelementptr i32, ptr %arr2, i32 3 |
| %v0 = load i32, ptr %arr1 |
| %v1 = load i32, ptr %gep1.1 |
| %v2 = load i32, ptr %gep1.2 |
| %v3 = load i32, ptr %gep1.3 |
| %y0 = add nsw i32 %a0, 1146 |
| %y1 = add nsw i32 %a1, 146 |
| %y2 = add nsw i32 %a2, 42 |
| %y3 = add nsw i32 %a3, 0 |
| %res0 = add nsw i32 %y0, %v0 |
| %res1 = add nsw i32 %y1, %v1 |
| %res2 = udiv i32 %v2, %y2 |
| %res3 = add nsw i32 %y3, %v3 |
| store i32 %res0, ptr %arr2 |
| store i32 %res1, ptr %gep2.1 |
| store i32 %res2, ptr %gep2.2 |
| store i32 %res3, ptr %gep2.3 |
| ret void |
| } |
| |
| define void @test_add_mul(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { |
| ; CHECK-LABEL: @test_add_mul( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A3:%.*]], i32 3 |
| ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], <i32 1146, i32 146, i32 42, i32 0> |
| ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 |
| ; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP4]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP6]], [[TMP4]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> |
| ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[ARR2:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %gep1.1 = getelementptr i32, ptr %arr1, i32 1 |
| %gep1.2 = getelementptr i32, ptr %arr1, i32 2 |
| %gep1.3 = getelementptr i32, ptr %arr1, i32 3 |
| %gep2.1 = getelementptr i32, ptr %arr2, i32 1 |
| %gep2.2 = getelementptr i32, ptr %arr2, i32 2 |
| %gep2.3 = getelementptr i32, ptr %arr2, i32 3 |
| %v0 = load i32, ptr %arr1 |
| %v1 = load i32, ptr %gep1.1 |
| %v2 = load i32, ptr %gep1.2 |
| %v3 = load i32, ptr %gep1.3 |
| %y0 = add nsw i32 %a0, 1146 |
| %y1 = add nsw i32 %a1, 146 |
| %y2 = add i32 %a2, 42 |
| %y3 = add nsw i32 %a3, 0 |
| %res0 = add nsw i32 %v0, %y0 |
| %res1 = add nsw i32 %v1, %y1 |
| %res2 = mul i32 %v2, %y2 |
| %res3 = add nsw i32 %v3, %y3 |
| store i32 %res0, ptr %arr2 |
| store i32 %res1, ptr %gep2.1 |
| store i32 %res2, ptr %gep2.2 |
| store i32 %res3, ptr %gep2.3 |
| ret void |
| } |
| |
| define void @test_add_mul_commuted(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { |
| ; CHECK-LABEL: @test_add_mul_commuted( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[A3:%.*]], i32 3 |
| ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> <i32 1146, i32 146, i32 42, i32 0>, [[TMP3]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 |
| ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP5]], [[TMP8]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP5]], [[TMP8]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> |
| ; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[ARR2:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %gep1.1 = getelementptr i32, ptr %arr1, i32 1 |
| %gep1.2 = getelementptr i32, ptr %arr1, i32 2 |
| %gep1.3 = getelementptr i32, ptr %arr1, i32 3 |
| %gep2.1 = getelementptr i32, ptr %arr2, i32 1 |
| %gep2.2 = getelementptr i32, ptr %arr2, i32 2 |
| %gep2.3 = getelementptr i32, ptr %arr2, i32 3 |
| %v0 = load i32, ptr %arr1 |
| %v1 = load i32, ptr %gep1.1 |
| %v2 = load i32, ptr %gep1.2 |
| %v3 = load i32, ptr %gep1.3 |
| %y0 = add nsw i32 %a0, 1146 |
| %y1 = add nsw i32 %a1, 146 |
| %y2 = add i32 42, %a2 |
| %y3 = add nsw i32 %a3, 0 |
| %res0 = add nsw i32 %y0, %v0 |
| %res1 = add nsw i32 %y1, %v1 |
| %res2 = mul i32 %v2, %y2 |
| %res3 = add nsw i32 %y3, %v3 |
| store i32 %res0, ptr %arr2 |
| store i32 %res1, ptr %gep2.1 |
| store i32 %res2, ptr %gep2.2 |
| store i32 %res3, ptr %gep2.3 |
| ret void |
| } |
| |
| define void @test_add_udiv_sub(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { |
| ; CHECK-LABEL: @test_add_udiv_sub( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 1, i32 1, i32 poison, i32 1>, i32 [[A2:%.*]], i32 2 |
| ; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[TMP1]], <i32 0, i32 0, i32 42, i32 0> |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A1:%.*]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[A3:%.*]], i32 3 |
| ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP5]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP6]] |
| ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[ARR2:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %gep1.1 = getelementptr i32, ptr %arr1, i32 1 |
| %gep1.2 = getelementptr i32, ptr %arr1, i32 2 |
| %gep1.3 = getelementptr i32, ptr %arr1, i32 3 |
| %gep2.1 = getelementptr i32, ptr %arr2, i32 1 |
| %gep2.2 = getelementptr i32, ptr %arr2, i32 2 |
| %gep2.3 = getelementptr i32, ptr %arr2, i32 3 |
| %v0 = load i32, ptr %arr1 |
| %v1 = load i32, ptr %gep1.1 |
| %v2 = load i32, ptr %gep1.2 |
| %v3 = load i32, ptr %gep1.3 |
| %y0 = add nsw i32 %a0, 1146 |
| %y1 = add nsw i32 %a1, 146 |
| %y2 = sub i32 %a2, 42 |
| %y3 = add nsw i32 %a3, 0 |
| %res0 = add nsw i32 %v0, %y0 |
| %res1 = add nsw i32 %v1, %y1 |
| %res2 = udiv i32 %v2, %y2 |
| %res3 = add nsw i32 %v3, %y3 |
| store i32 %res0, ptr %arr2 |
| store i32 %res1, ptr %gep2.1 |
| store i32 %res2, ptr %gep2.2 |
| store i32 %res3, ptr %gep2.3 |
| ret void |
| } |
| |
| define void @test_add_udiv_sub_commuted(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { |
| ; CHECK-LABEL: @test_add_udiv_sub_commuted( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 1, i32 1, i32 poison, i32 1>, i32 [[A2:%.*]], i32 2 |
| ; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP4]], <i32 0, i32 0, i32 42, i32 0> |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 |
| ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP5]], [[TMP8]] |
| ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %gep1.1 = getelementptr i32, ptr %arr1, i32 1 |
| %gep1.2 = getelementptr i32, ptr %arr1, i32 2 |
| %gep1.3 = getelementptr i32, ptr %arr1, i32 3 |
| %gep2.1 = getelementptr i32, ptr %arr2, i32 1 |
| %gep2.2 = getelementptr i32, ptr %arr2, i32 2 |
| %gep2.3 = getelementptr i32, ptr %arr2, i32 3 |
| %v0 = load i32, ptr %arr1 |
| %v1 = load i32, ptr %gep1.1 |
| %v2 = load i32, ptr %gep1.2 |
| %v3 = load i32, ptr %gep1.3 |
| %y0 = add nsw i32 %a0, 1146 |
| %y1 = add nsw i32 %a1, 146 |
| %y2 = sub i32 %a2, 42 |
| %y3 = add nsw i32 %a3, 0 |
| %res0 = add nsw i32 %y0, %v0 |
| %res1 = add nsw i32 %y1, %v1 |
| %res2 = udiv i32 %v2, %y2 |
| %res3 = add nsw i32 %y3, %v3 |
| store i32 %res0, ptr %arr2 |
| store i32 %res1, ptr %gep2.1 |
| store i32 %res2, ptr %gep2.2 |
| store i32 %res3, ptr %gep2.3 |
| ret void |
| } |
| |
| define void @test_sub_zero_copyable_not_swapped(ptr %z) { |
| ; CHECK-LABEL: @test_sub_zero_copyable_not_swapped( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[GEP_SRC_FAR0:%.*]] = getelementptr inbounds i32, ptr [[Z:%.*]], i64 0 |
| ; CHECK-NEXT: [[GEP_SRC_NEAR0:%.*]] = getelementptr inbounds i32, ptr [[Z]], i64 1 |
| ; CHECK-NEXT: [[GEP_SRC_FAR1:%.*]] = getelementptr inbounds i32, ptr [[Z]], i64 2 |
| ; CHECK-NEXT: [[GEP_SRC_NEAR1:%.*]] = getelementptr inbounds i32, ptr [[Z]], i64 3 |
| ; CHECK-NEXT: [[GEP_DST0:%.*]] = getelementptr inbounds i32, ptr [[Z]], i64 64 |
| ; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[GEP_SRC_FAR0]], align 4 |
| ; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[GEP_SRC_NEAR0]], align 4 |
| ; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP_SRC_FAR1]], align 4 |
| ; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP_SRC_NEAR1]], align 4 |
| ; CHECK-NEXT: [[NEG0:%.*]] = sub nsw i32 0, [[V0]] |
| ; CHECK-NEXT: [[NEG2:%.*]] = sub nsw i32 0, [[V2]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[NEG0]], i32 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[V1]], i32 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[NEG2]], i32 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[V3]], i32 3 |
| ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[GEP_DST0]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %gep.src.far0 = getelementptr inbounds i32, ptr %z, i64 0 |
| %gep.src.near0 = getelementptr inbounds i32, ptr %z, i64 1 |
| %gep.src.far1 = getelementptr inbounds i32, ptr %z, i64 2 |
| %gep.src.near1 = getelementptr inbounds i32, ptr %z, i64 3 |
| %gep.dst0 = getelementptr inbounds i32, ptr %z, i64 64 |
| %gep.dst1 = getelementptr inbounds i32, ptr %z, i64 65 |
| %gep.dst2 = getelementptr inbounds i32, ptr %z, i64 66 |
| %gep.dst3 = getelementptr inbounds i32, ptr %z, i64 67 |
| %v0 = load i32, ptr %gep.src.far0, align 4 |
| %v1 = load i32, ptr %gep.src.near0, align 4 |
| %v2 = load i32, ptr %gep.src.far1, align 4 |
| %v3 = load i32, ptr %gep.src.near1, align 4 |
| %neg0 = sub nsw i32 0, %v0 |
| %neg2 = sub nsw i32 0, %v2 |
| store i32 %neg0, ptr %gep.dst0, align 4 |
| store i32 %v1, ptr %gep.dst1, align 4 |
| store i32 %neg2, ptr %gep.dst2, align 4 |
| store i32 %v3, ptr %gep.dst3, align 4 |
| ret void |
| } |