blob: 5a685e433ed5e07bdcc0a4fbcb5989f79ca63a0b [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
define float @test_v2f32_element_0_zero(<2 x float> %vec) {
; CHECK-LABEL: test_v2f32_element_0_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: ret
entry:
%with_zero = insertelement <2 x float> %vec, float 0.0, i64 0
%sum = call nsz float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %with_zero)
ret float %sum
}
define float @test_v4f32_element_3_zero(<4 x float> %vec) {
; CHECK-LABEL: test_v4f32_element_3_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov s1, v0.s[2]
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%with_zero = insertelement <4 x float> %vec, float 0.0, i64 3
%sum = call nsz float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %with_zero)
ret float %sum
}
define float @test_v4f32_elements_0_2_zero(<4 x float> %vec) {
; CHECK-LABEL: test_v4f32_elements_0_2_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov s1, v0.s[3]
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%zero1 = insertelement <4 x float> %vec, float 0.0, i64 0
%zero2 = insertelement <4 x float> %zero1, float 0.0, i64 2
%sum = call nsz float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %zero2)
ret float %sum
}
define float @test_v4f32_all_zero(<4 x float> %vec) {
; CHECK-LABEL: test_v4f32_all_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: ret
entry:
%zero1 = insertelement <4 x float> %vec, float 0.0, i64 0
%zero2 = insertelement <4 x float> %zero1, float 0.0, i64 1
%zero3 = insertelement <4 x float> %zero2, float 0.0, i64 2
%zero4 = insertelement <4 x float> %zero3, float 0.0, i64 3
%sum = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %zero4)
ret float %sum
}
define double @test_v2f64_element_0_zero(<2 x double> %vec) {
; CHECK-LABEL: test_v2f64_element_0_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov d0, v0.d[1]
; CHECK-NEXT: ret
entry:
%with_zero = insertelement <2 x double> %vec, double 0.0, i64 0
%sum = call nsz double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %with_zero)
ret double %sum
}
define float @negative_test(<4 x float> %vec) {
; CHECK-LABEL: negative_test:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%with_zero = insertelement <4 x float> %vec, float 0.0, i64 3
%sum = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %with_zero)
ret float %sum
}
define float @test_reduce_v2f32_element_0_zero(<2 x float> %vec) {
; CHECK-LABEL: test_reduce_v2f32_element_0_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: ret
entry:
%with_zero = insertelement <2 x float> %vec, float 0.0, i64 0
%sum = call reassoc nsz float @llvm.vector.reduce.fadd.v2f32(float -0.0, <2 x float> %with_zero)
ret float %sum
}
define float @test_reduce_v4f32_element_3_zero(<4 x float> %vec) {
; CHECK-LABEL: test_reduce_v4f32_element_3_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov s1, v0.s[2]
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%with_zero = insertelement <4 x float> %vec, float 0.0, i64 3
%sum = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %with_zero)
ret float %sum
}
define float @test_reduce_v4f32_elements_0_2_zero(<4 x float> %vec) {
; CHECK-LABEL: test_reduce_v4f32_elements_0_2_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov s1, v0.s[3]
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%zero1 = insertelement <4 x float> %vec, float 0.0, i64 0
%zero2 = insertelement <4 x float> %zero1, float 0.0, i64 2
%sum = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %zero2)
ret float %sum
}
define float @test_reduce_v4f32_all_zero(<4 x float> %vec) {
; CHECK-LABEL: test_reduce_v4f32_all_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: ret
entry:
%zero1 = insertelement <4 x float> %vec, float 0.0, i64 0
%zero2 = insertelement <4 x float> %zero1, float 0.0, i64 1
%zero3 = insertelement <4 x float> %zero2, float 0.0, i64 2
%zero4 = insertelement <4 x float> %zero3, float 0.0, i64 3
%sum = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %zero4)
ret float %sum
}
define double @test_reduce_v2f64_element_0_zero(<2 x double> %vec) {
; CHECK-LABEL: test_reduce_v2f64_element_0_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov d0, v0.d[1]
; CHECK-NEXT: ret
entry:
%with_zero = insertelement <2 x double> %vec, double 0.0, i64 0
%sum = call reassoc nsz double @llvm.vector.reduce.fadd.v2f64(double -0.0, <2 x double> %with_zero)
ret double %sum
}
define float @negative_test_reduce(<4 x float> %vec) {
; CHECK-LABEL: negative_test_reduce:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%with_zero = insertelement <4 x float> %vec, float 0.0, i64 3
%sum = call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %with_zero)
ret float %sum
}
define float @test_seq_reduce_v2f32_element_0_zero(<2 x float> %vec) {
; CHECK-LABEL: test_seq_reduce_v2f32_element_0_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: ret
entry:
%with_zero = insertelement <2 x float> %vec, float 0.0, i64 0
%sum = call nsz float @llvm.vector.reduce.fadd.v2f32(float -0.0, <2 x float> %with_zero)
ret float %sum
}
define float @test_seq_reduce_v4f32_element_3_zero(<4 x float> %vec) {
; CHECK-LABEL: test_seq_reduce_v4f32_element_3_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov s1, v0.s[2]
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%with_zero = insertelement <4 x float> %vec, float 0.0, i64 3
%sum = call nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %with_zero)
ret float %sum
}
define float @test_seq_reduce_v4f32_elements_0_2_zero(<4 x float> %vec) {
; CHECK-LABEL: test_seq_reduce_v4f32_elements_0_2_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov s1, v0.s[3]
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%zero1 = insertelement <4 x float> %vec, float 0.0, i64 0
%zero2 = insertelement <4 x float> %zero1, float 0.0, i64 2
%sum = call nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %zero2)
ret float %sum
}
define float @test_seq_reduce_v4f32_all_zero(float %start, <4 x float> %vec) {
; CHECK-LABEL: test_seq_reduce_v4f32_all_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ret
entry:
%zero1 = insertelement <4 x float> %vec, float 0.0, i64 0
%zero2 = insertelement <4 x float> %zero1, float 0.0, i64 1
%zero3 = insertelement <4 x float> %zero2, float 0.0, i64 2
%zero4 = insertelement <4 x float> %zero3, float 0.0, i64 3
%sum = call nsz float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %zero4)
ret float %sum
}
define float @test_seq_reduce_with_start(float %start, <4 x float> %vec) {
; CHECK-LABEL: test_seq_reduce_with_start:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: mov s2, v1.s[1]
; CHECK-NEXT: mov s1, v1.s[2]
; CHECK-NEXT: fadd s0, s0, s2
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%with_zero = insertelement <4 x float> %vec, float 0.0, i64 3
%sum = call nsz float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %with_zero)
ret float %sum
}
define double @test_seq_reduce_v2f64_element_0_zero(<2 x double> %vec) {
; CHECK-LABEL: test_seq_reduce_v2f64_element_0_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov d0, v0.d[1]
; CHECK-NEXT: ret
entry:
%with_zero = insertelement <2 x double> %vec, double 0.0, i64 0
%sum = call nsz double @llvm.vector.reduce.fadd.v2f64(double -0.0, <2 x double> %with_zero)
ret double %sum
}
define float @negative_test_seq_reduce(<4 x float> %vec) {
; CHECK-LABEL: negative_test_seq_reduce:
; CHECK: // %bb.0:
; CHECK-NEXT: mov s2, v0.s[2]
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fadd s0, s0, s2
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
%with_zero = insertelement <4 x float> %vec, float 0.0, i64 3
%sum = call float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %with_zero)
ret float %sum
}
; Negative test: sequential all-zero without nsz should not return Start
; directly because fadd(-0.0, +0.0) = +0.0, not -0.0.
define float @negative_test_seq_reduce_all_zero_no_nsz(float %start, <4 x float> %vec) {
; CHECK-LABEL: negative_test_seq_reduce_all_zero_no_nsz:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%zero1 = insertelement <4 x float> %vec, float 0.0, i64 0
%zero2 = insertelement <4 x float> %zero1, float 0.0, i64 1
%zero3 = insertelement <4 x float> %zero2, float 0.0, i64 2
%zero4 = insertelement <4 x float> %zero3, float 0.0, i64 3
%sum = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %zero4)
ret float %sum
}
; Test with -0.0 elements (negative zero). With nsz, these should be
; treated as zero elements.
define float @test_v4f32_neg_zero_element(<4 x float> %vec) {
; CHECK-LABEL: test_v4f32_neg_zero_element:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov s1, v0.s[2]
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%with_neg_zero = insertelement <4 x float> %vec, float -0.0, i64 3
%sum = call nsz float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %with_neg_zero)
ret float %sum
}
define float @test_reduce_v4f32_neg_zero_element(<4 x float> %vec) {
; CHECK-LABEL: test_reduce_v4f32_neg_zero_element:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov s1, v0.s[2]
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
entry:
%with_neg_zero = insertelement <4 x float> %vec, float -0.0, i64 3
%sum = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %with_neg_zero)
ret float %sum
}
declare float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float>)
declare float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float>)
declare double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double>)
declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)