| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s |
| |
| define float @test_v2f32_element_0_zero(<2 x float> %vec) { |
| ; CHECK-LABEL: test_v2f32_element_0_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: mov s0, v0.s[1] |
| ; CHECK-NEXT: ret |
| entry: |
| %with_zero = insertelement <2 x float> %vec, float 0.0, i64 0 |
| %sum = call nsz float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %with_zero) |
| ret float %sum |
| } |
| |
| define float @test_v4f32_element_3_zero(<4 x float> %vec) { |
| ; CHECK-LABEL: test_v4f32_element_3_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov s1, v0.s[2] |
| ; CHECK-NEXT: faddp s0, v0.2s |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %with_zero = insertelement <4 x float> %vec, float 0.0, i64 3 |
| %sum = call nsz float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %with_zero) |
| ret float %sum |
| } |
| |
| define float @test_v4f32_elements_0_2_zero(<4 x float> %vec) { |
| ; CHECK-LABEL: test_v4f32_elements_0_2_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov s1, v0.s[3] |
| ; CHECK-NEXT: mov s0, v0.s[1] |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %zero1 = insertelement <4 x float> %vec, float 0.0, i64 0 |
| %zero2 = insertelement <4 x float> %zero1, float 0.0, i64 2 |
| %sum = call nsz float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %zero2) |
| ret float %sum |
| } |
| |
| define float @test_v4f32_all_zero(<4 x float> %vec) { |
| ; CHECK-LABEL: test_v4f32_all_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d0, #0000000000000000 |
| ; CHECK-NEXT: ret |
| entry: |
| %zero1 = insertelement <4 x float> %vec, float 0.0, i64 0 |
| %zero2 = insertelement <4 x float> %zero1, float 0.0, i64 1 |
| %zero3 = insertelement <4 x float> %zero2, float 0.0, i64 2 |
| %zero4 = insertelement <4 x float> %zero3, float 0.0, i64 3 |
| %sum = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %zero4) |
| ret float %sum |
| } |
| |
| define double @test_v2f64_element_0_zero(<2 x double> %vec) { |
| ; CHECK-LABEL: test_v2f64_element_0_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov d0, v0.d[1] |
| ; CHECK-NEXT: ret |
| entry: |
| %with_zero = insertelement <2 x double> %vec, double 0.0, i64 0 |
| %sum = call nsz double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %with_zero) |
| ret double %sum |
| } |
| |
| define float @negative_test(<4 x float> %vec) { |
| ; CHECK-LABEL: negative_test: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: movi d1, #0000000000000000 |
| ; CHECK-NEXT: mov v0.s[3], v1.s[0] |
| ; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s |
| ; CHECK-NEXT: faddp s0, v0.2s |
| ; CHECK-NEXT: ret |
| %with_zero = insertelement <4 x float> %vec, float 0.0, i64 3 |
| %sum = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %with_zero) |
| ret float %sum |
| } |
| |
| define float @test_reduce_v2f32_element_0_zero(<2 x float> %vec) { |
| ; CHECK-LABEL: test_reduce_v2f32_element_0_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: mov s0, v0.s[1] |
| ; CHECK-NEXT: ret |
| entry: |
| %with_zero = insertelement <2 x float> %vec, float 0.0, i64 0 |
| %sum = call reassoc nsz float @llvm.vector.reduce.fadd.v2f32(float -0.0, <2 x float> %with_zero) |
| ret float %sum |
| } |
| |
| define float @test_reduce_v4f32_element_3_zero(<4 x float> %vec) { |
| ; CHECK-LABEL: test_reduce_v4f32_element_3_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov s1, v0.s[2] |
| ; CHECK-NEXT: faddp s0, v0.2s |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %with_zero = insertelement <4 x float> %vec, float 0.0, i64 3 |
| %sum = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %with_zero) |
| ret float %sum |
| } |
| |
| define float @test_reduce_v4f32_elements_0_2_zero(<4 x float> %vec) { |
| ; CHECK-LABEL: test_reduce_v4f32_elements_0_2_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov s1, v0.s[3] |
| ; CHECK-NEXT: mov s0, v0.s[1] |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %zero1 = insertelement <4 x float> %vec, float 0.0, i64 0 |
| %zero2 = insertelement <4 x float> %zero1, float 0.0, i64 2 |
| %sum = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %zero2) |
| ret float %sum |
| } |
| |
| define float @test_reduce_v4f32_all_zero(<4 x float> %vec) { |
| ; CHECK-LABEL: test_reduce_v4f32_all_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d0, #0000000000000000 |
| ; CHECK-NEXT: ret |
| entry: |
| %zero1 = insertelement <4 x float> %vec, float 0.0, i64 0 |
| %zero2 = insertelement <4 x float> %zero1, float 0.0, i64 1 |
| %zero3 = insertelement <4 x float> %zero2, float 0.0, i64 2 |
| %zero4 = insertelement <4 x float> %zero3, float 0.0, i64 3 |
| %sum = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %zero4) |
| ret float %sum |
| } |
| |
| define double @test_reduce_v2f64_element_0_zero(<2 x double> %vec) { |
| ; CHECK-LABEL: test_reduce_v2f64_element_0_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov d0, v0.d[1] |
| ; CHECK-NEXT: ret |
| entry: |
| %with_zero = insertelement <2 x double> %vec, double 0.0, i64 0 |
| %sum = call reassoc nsz double @llvm.vector.reduce.fadd.v2f64(double -0.0, <2 x double> %with_zero) |
| ret double %sum |
| } |
| |
| define float @negative_test_reduce(<4 x float> %vec) { |
| ; CHECK-LABEL: negative_test_reduce: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: movi d1, #0000000000000000 |
| ; CHECK-NEXT: mov v0.s[3], v1.s[0] |
| ; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s |
| ; CHECK-NEXT: faddp s0, v0.2s |
| ; CHECK-NEXT: ret |
| %with_zero = insertelement <4 x float> %vec, float 0.0, i64 3 |
| %sum = call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %with_zero) |
| ret float %sum |
| } |
| |
| define float @test_seq_reduce_v2f32_element_0_zero(<2 x float> %vec) { |
| ; CHECK-LABEL: test_seq_reduce_v2f32_element_0_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: mov s0, v0.s[1] |
| ; CHECK-NEXT: ret |
| entry: |
| %with_zero = insertelement <2 x float> %vec, float 0.0, i64 0 |
| %sum = call nsz float @llvm.vector.reduce.fadd.v2f32(float -0.0, <2 x float> %with_zero) |
| ret float %sum |
| } |
| |
| define float @test_seq_reduce_v4f32_element_3_zero(<4 x float> %vec) { |
| ; CHECK-LABEL: test_seq_reduce_v4f32_element_3_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov s1, v0.s[2] |
| ; CHECK-NEXT: faddp s0, v0.2s |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %with_zero = insertelement <4 x float> %vec, float 0.0, i64 3 |
| %sum = call nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %with_zero) |
| ret float %sum |
| } |
| |
| define float @test_seq_reduce_v4f32_elements_0_2_zero(<4 x float> %vec) { |
| ; CHECK-LABEL: test_seq_reduce_v4f32_elements_0_2_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov s1, v0.s[3] |
| ; CHECK-NEXT: mov s0, v0.s[1] |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %zero1 = insertelement <4 x float> %vec, float 0.0, i64 0 |
| %zero2 = insertelement <4 x float> %zero1, float 0.0, i64 2 |
| %sum = call nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %zero2) |
| ret float %sum |
| } |
| |
| define float @test_seq_reduce_v4f32_all_zero(float %start, <4 x float> %vec) { |
| ; CHECK-LABEL: test_seq_reduce_v4f32_all_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ret |
| entry: |
| %zero1 = insertelement <4 x float> %vec, float 0.0, i64 0 |
| %zero2 = insertelement <4 x float> %zero1, float 0.0, i64 1 |
| %zero3 = insertelement <4 x float> %zero2, float 0.0, i64 2 |
| %zero4 = insertelement <4 x float> %zero3, float 0.0, i64 3 |
| %sum = call nsz float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %zero4) |
| ret float %sum |
| } |
| |
| define float @test_seq_reduce_with_start(float %start, <4 x float> %vec) { |
| ; CHECK-LABEL: test_seq_reduce_with_start: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: mov s2, v1.s[1] |
| ; CHECK-NEXT: mov s1, v1.s[2] |
| ; CHECK-NEXT: fadd s0, s0, s2 |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %with_zero = insertelement <4 x float> %vec, float 0.0, i64 3 |
| %sum = call nsz float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %with_zero) |
| ret float %sum |
| } |
| |
| define double @test_seq_reduce_v2f64_element_0_zero(<2 x double> %vec) { |
| ; CHECK-LABEL: test_seq_reduce_v2f64_element_0_zero: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov d0, v0.d[1] |
| ; CHECK-NEXT: ret |
| entry: |
| %with_zero = insertelement <2 x double> %vec, double 0.0, i64 0 |
| %sum = call nsz double @llvm.vector.reduce.fadd.v2f64(double -0.0, <2 x double> %with_zero) |
| ret double %sum |
| } |
| |
| define float @negative_test_seq_reduce(<4 x float> %vec) { |
| ; CHECK-LABEL: negative_test_seq_reduce: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov s2, v0.s[2] |
| ; CHECK-NEXT: faddp s0, v0.2s |
| ; CHECK-NEXT: movi d1, #0000000000000000 |
| ; CHECK-NEXT: fadd s0, s0, s2 |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| %with_zero = insertelement <4 x float> %vec, float 0.0, i64 3 |
| %sum = call float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %with_zero) |
| ret float %sum |
| } |
| |
| ; Negative test: sequential all-zero without nsz should not return Start |
| ; directly because fadd(-0.0, +0.0) = +0.0, not -0.0. |
| define float @negative_test_seq_reduce_all_zero_no_nsz(float %start, <4 x float> %vec) { |
| ; CHECK-LABEL: negative_test_seq_reduce_all_zero_no_nsz: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi d1, #0000000000000000 |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %zero1 = insertelement <4 x float> %vec, float 0.0, i64 0 |
| %zero2 = insertelement <4 x float> %zero1, float 0.0, i64 1 |
| %zero3 = insertelement <4 x float> %zero2, float 0.0, i64 2 |
| %zero4 = insertelement <4 x float> %zero3, float 0.0, i64 3 |
| %sum = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %zero4) |
| ret float %sum |
| } |
| |
| ; Test with -0.0 elements (negative zero). With nsz, these should be |
| ; treated as zero elements. |
| define float @test_v4f32_neg_zero_element(<4 x float> %vec) { |
| ; CHECK-LABEL: test_v4f32_neg_zero_element: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov s1, v0.s[2] |
| ; CHECK-NEXT: faddp s0, v0.2s |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %with_neg_zero = insertelement <4 x float> %vec, float -0.0, i64 3 |
| %sum = call nsz float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %with_neg_zero) |
| ret float %sum |
| } |
| |
| define float @test_reduce_v4f32_neg_zero_element(<4 x float> %vec) { |
| ; CHECK-LABEL: test_reduce_v4f32_neg_zero_element: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov s1, v0.s[2] |
| ; CHECK-NEXT: faddp s0, v0.2s |
| ; CHECK-NEXT: fadd s0, s0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %with_neg_zero = insertelement <4 x float> %vec, float -0.0, i64 3 |
| %sum = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %with_neg_zero) |
| ret float %sum |
| } |
| |
| declare float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float>) |
| declare float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float>) |
| declare double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double>) |
| |
| declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) |
| declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) |
| declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) |