| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -instcombine -S | FileCheck %s |
| |
| declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) |
| declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>) |
| declare void @use_f32(float) |
| |
| declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) |
| declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) |
| declare void @use_i32(i32) |
| |
| define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { |
| ; CHECK-LABEL: @diff_of_sums_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz <4 x float> [[V0:%.*]], [[V1:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[TMP1]]) |
| ; CHECK-NEXT: [[R:%.*]] = fsub reassoc nsz float [[TMP2]], [[A1:%.*]] |
| ; CHECK-NEXT: ret float [[R]] |
| ; |
| %r0 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) |
| %r1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) |
| %r = fsub reassoc nsz float %r0, %r1 |
| ret float %r |
| } |
| |
| ; negative test - fsub must allow reassociation |
| |
| define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { |
| ; CHECK-LABEL: @diff_of_sums_v4f32_fmf( |
| ; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) |
| ; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) |
| ; CHECK-NEXT: [[R:%.*]] = fsub nnan ninf nsz float [[R0]], [[R1]] |
| ; CHECK-NEXT: ret float [[R]] |
| ; |
| %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) |
| %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) |
| %r = fsub ninf nnan nsz float %r0, %r1 |
| ret float %r |
| } |
| |
| ; negative test - extra uses could create extra instructions |
| |
| define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { |
| ; CHECK-LABEL: @diff_of_sums_extra_use1( |
| ; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) |
| ; CHECK-NEXT: call void @use_f32(float [[R0]]) |
| ; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) |
| ; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] |
| ; CHECK-NEXT: ret float [[R]] |
| ; |
| %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) |
| call void @use_f32(float %r0) |
| %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) |
| %r = fsub fast float %r0, %r1 |
| ret float %r |
| } |
| |
| ; negative test - extra uses could create extra instructions |
| |
| define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { |
| ; CHECK-LABEL: @diff_of_sums_extra_use2( |
| ; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) |
| ; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) |
| ; CHECK-NEXT: call void @use_f32(float [[R1]]) |
| ; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] |
| ; CHECK-NEXT: ret float [[R]] |
| ; |
| %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) |
| %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) |
| call void @use_f32(float %r1) |
| %r = fsub fast float %r0, %r1 |
| ret float %r |
| } |
| |
| ; negative test - can't reassociate different vector types |
| |
| define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) { |
| ; CHECK-LABEL: @diff_of_sums_type_mismatch( |
| ; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) |
| ; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float [[A1:%.*]], <8 x float> [[V1:%.*]]) |
| ; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] |
| ; CHECK-NEXT: ret float [[R]] |
| ; |
| %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) |
| %r1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a1, <8 x float> %v1) |
| %r = fsub fast float %r0, %r1 |
| ret float %r |
| } |
| |
| define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) { |
| ; CHECK-LABEL: @diff_of_sums_v4i32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) |
| ; CHECK-NEXT: ret i32 [[TMP2]] |
| ; |
| %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0) |
| %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) |
| %r = sub i32 %r0, %r1 |
| ret i32 %r |
| } |
| |
| ; negative test - extra uses could create extra instructions |
| |
| define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) { |
| ; CHECK-LABEL: @diff_of_sums_v4i32_extra_use1( |
| ; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]]) |
| ; CHECK-NEXT: call void @use_i32(i32 [[R0]]) |
| ; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) |
| ; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0) |
| call void @use_i32(i32 %r0) |
| %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) |
| %r = sub i32 %r0, %r1 |
| ret i32 %r |
| } |
| |
| ; negative test - extra uses could create extra instructions |
| |
| define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) { |
| ; CHECK-LABEL: @diff_of_sums_v4i32_extra_use2( |
| ; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]]) |
| ; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) |
| ; CHECK-NEXT: call void @use_i32(i32 [[R1]]) |
| ; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0) |
| %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) |
| call void @use_i32(i32 %r1) |
| %r = sub i32 %r0, %r1 |
| ret i32 %r |
| } |
| |
| ; negative test - can't reassociate different vector types |
| |
| define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) { |
| ; CHECK-LABEL: @diff_of_sums_type_mismatch2( |
| ; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V0:%.*]]) |
| ; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) |
| ; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %r0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v0) |
| %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) |
| %r = sub i32 %r0, %r1 |
| ret i32 %r |
| } |