blob: b2c2c1893ea4469ca8ac72696deee37cf5aff161 [file] [log] [blame] [edit]
;; Check HVX vectorization.
; RUN: llc -mtriple hexagon < %s | FileCheck %s --check-prefixes=CHECK,CHECK-HVX
;; Check that there is no failure when compiling to scalar code, don't check the output.
; RUN: llc -mtriple hexagon -mattr=-hvx,-hvxv73,-hvx-length128b < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-HVX
define <16 x i32> @partial_reduce_uu_64(<16 x i32> %acc, <64 x i8> %x, <64 x i8> %y) #0 {
; CHECK-LABEL: partial_reduce_uu_64:
; CHECK-HVX: v0.uw += vrmpy(v1.ub,v2.ub)
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
%x.ext = zext <64 x i8> %x to <64 x i32>
%y.ext = zext <64 x i8> %y to <64 x i32>
%m = mul nuw nsw <64 x i32> %x.ext, %y.ext
%partial.reduce = tail call <16 x i32> @llvm.vector.partial.reduce.add.v32i32.v128i32(<16 x i32> %acc, <64 x i32> %m)
ret <16 x i32> %partial.reduce
}
define <16 x i32> @partial_reduce_su_64(<16 x i32> %acc, <64 x i8> %x, <64 x i8> %y) #0 {
; CHECK-LABEL: partial_reduce_su_64:
; CHECK-HVX: v0.w += vrmpy(v2.ub,v1.b)
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
%x.ext = sext <64 x i8> %x to <64 x i32>
%y.ext = zext <64 x i8> %y to <64 x i32>
%m = mul nuw nsw <64 x i32> %x.ext, %y.ext
%partial.reduce = tail call <16 x i32> @llvm.vector.partial.reduce.add.v32i32.v128i32(<16 x i32> %acc, <64 x i32> %m)
ret <16 x i32> %partial.reduce
}
define <16 x i32> @partial_reduce_us_64(<16 x i32> %acc, <64 x i8> %x, <64 x i8> %y) #0 {
; CHECK-LABEL: partial_reduce_us_64:
; CHECK-HVX: v0.w += vrmpy(v1.ub,v2.b)
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
%x.ext = zext <64 x i8> %x to <64 x i32>
%y.ext = sext <64 x i8> %y to <64 x i32>
%m = mul nuw nsw <64 x i32> %x.ext, %y.ext
%partial.reduce = tail call <16 x i32> @llvm.vector.partial.reduce.add.v32i32.v128i32(<16 x i32> %acc, <64 x i32> %m)
ret <16 x i32> %partial.reduce
}
define <16 x i32> @partial_reduce_ss_64(<16 x i32> %acc, <64 x i8> %x, <64 x i8> %y) #0 {
; CHECK-LABEL: partial_reduce_ss_64:
; CHECK-HVX: v0.w += vrmpy(v1.b,v2.b)
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
%x.ext = sext <64 x i8> %x to <64 x i32>
%y.ext = sext <64 x i8> %y to <64 x i32>
%m = mul nuw nsw <64 x i32> %x.ext, %y.ext
%partial.reduce = tail call <16 x i32> @llvm.vector.partial.reduce.add.v32i32.v128i32(<16 x i32> %acc, <64 x i32> %m)
ret <16 x i32> %partial.reduce
}
define <32 x i32> @partial_reduce_uu_128(<32 x i32> %acc, <128 x i8> %x, <128 x i8> %y) #1 {
; CHECK-LABEL: partial_reduce_uu_128:
; CHECK-HVX: v0.uw += vrmpy(v1.ub,v2.ub)
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
%x.ext = zext <128 x i8> %x to <128 x i32>
%y.ext = zext <128 x i8> %y to <128 x i32>
%m = mul nuw nsw <128 x i32> %x.ext, %y.ext
%partial.reduce = tail call <32 x i32> @llvm.vector.partial.reduce.add.v32i32.v128i32(<32 x i32> %acc, <128 x i32> %m)
ret <32 x i32> %partial.reduce
}
define <32 x i32> @partial_reduce_su_128(<32 x i32> %acc, <128 x i8> %x, <128 x i8> %y) #1 {
; CHECK-LABEL: partial_reduce_su_128:
; CHECK-HVX: v0.w += vrmpy(v2.ub,v1.b)
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
%x.ext = sext <128 x i8> %x to <128 x i32>
%y.ext = zext <128 x i8> %y to <128 x i32>
%m = mul nuw nsw <128 x i32> %x.ext, %y.ext
%partial.reduce = tail call <32 x i32> @llvm.vector.partial.reduce.add.v32i32.v128i32(<32 x i32> %acc, <128 x i32> %m)
ret <32 x i32> %partial.reduce
}
define <32 x i32> @partial_reduce_us_128(<32 x i32> %acc, <128 x i8> %x, <128 x i8> %y) #1 {
; CHECK-LABEL: partial_reduce_us_128:
; CHECK-HVX: v0.w += vrmpy(v1.ub,v2.b)
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
%x.ext = zext <128 x i8> %x to <128 x i32>
%y.ext = sext <128 x i8> %y to <128 x i32>
%m = mul nuw nsw <128 x i32> %x.ext, %y.ext
%partial.reduce = tail call <32 x i32> @llvm.vector.partial.reduce.add.v32i32.v128i32(<32 x i32> %acc, <128 x i32> %m)
ret <32 x i32> %partial.reduce
}
define <32 x i32> @partial_reduce_ss_128(<32 x i32> %acc, <128 x i8> %x, <128 x i8> %y) #1 {
; CHECK-LABEL: partial_reduce_ss_128:
; CHECK-HVX: v0.w += vrmpy(v1.b,v2.b)
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
%x.ext = sext <128 x i8> %x to <128 x i32>
%y.ext = sext <128 x i8> %y to <128 x i32>
%m = mul nuw nsw <128 x i32> %x.ext, %y.ext
%partial.reduce = tail call <32 x i32> @llvm.vector.partial.reduce.add.v32i32.v128i32(<32 x i32> %acc, <128 x i32> %m)
ret <32 x i32> %partial.reduce
}
;; Multiple-size inputs, same output size.
define <32 x i32> @partial_reduce_uu_32xi32_256xi8(<32 x i32> %acc, <256 x i8> %x, <256 x i8> %y) #1 {
; CHECK-LABEL: partial_reduce_uu_32xi32_256xi8:
; CHECK-HVX: [[R1:v[0-9]+]].uw += vrmpy({{v[0-9]+}}.ub,{{v[0-9]+}}.ub)
; CHECK-HVX: [[R2:v[0-9]+]].uw += vrmpy({{v[0-9]+}}.ub,{{v[0-9]+}}.ub)
; CHECK-HVX: [[R3:v[0-9]+]].w = vadd(v0.w,[[R1]].w)
; CHECK-HVX: v0.w = vadd([[R2]].w,[[R3]].w)
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
%x.ext = zext <256 x i8> %x to <256 x i32>
%y.ext = zext <256 x i8> %y to <256 x i32>
%m = mul nuw nsw <256 x i32> %x.ext, %y.ext
%partial.reduce = tail call <32 x i32> @llvm.vector.partial.reduce.add.v32i32.v256i32(<32 x i32> %acc, <256 x i32> %m)
ret <32 x i32> %partial.reduce
}
define <32 x i32> @partial_reduce_uu_32xi32_1024xi8(<32 x i32> %acc, <1024 x i8> %x, <1024 x i8> %y) #1 {
; CHECK-LABEL: partial_reduce_uu_32xi32_1024xi8:
; CHECK-HVX-DAG: vrmpy
; CHECK-HVX-DAG: vadd
; CHECK-HVX-DAG: vrmpy
; CHECK-HVX-DAG: vadd
; CHECK-HVX-DAG: vrmpy
; CHECK-HVX-DAG: vadd
; CHECK-HVX-DAG: vrmpy
; CHECK-HVX-DAG: vadd
; CHECK-HVX-DAG: vrmpy
; CHECK-HVX-DAG: vadd
; CHECK-HVX-DAG: vrmpy
; CHECK-HVX-DAG: vadd
; CHECK-HVX-DAG: vrmpy
; CHECK-HVX-DAG: vadd
; CHECK-HVX-DAG: vrmpy
; CHECK-HVX-DAG: vadd
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
%x.ext = zext <1024 x i8> %x to <1024 x i32>
%y.ext = zext <1024 x i8> %y to <1024 x i32>
%m = mul nuw nsw <1024 x i32> %x.ext, %y.ext
%partial.reduce = tail call <32 x i32> @llvm.vector.partial.reduce.add.v32i32.v1024i32(<32 x i32> %acc, <1024 x i32> %m)
ret <32 x i32> %partial.reduce
}
define <256 x i32> @partial_reduce_uu_64xi32_1024xi8(<256 x i32> %acc, <1024 x i8> %x, <1024 x i8> %y) #1 {
; CHECK-LABEL: partial_reduce_uu_64xi32_1024xi8:
; CHECK-HVX-COUNT-8: vrmpy
; CHECK-HVX-NOT: vadd
; CHECK-NO-HVX: {{r[0-9]+}} += mpyi
; CHECK-HVX: dealloc_return
%x.ext = zext <1024 x i8> %x to <1024 x i32>
%y.ext = zext <1024 x i8> %y to <1024 x i32>
%m = mul nuw nsw <1024 x i32> %x.ext, %y.ext
%partial.reduce = tail call <256 x i32> @llvm.vector.partial.reduce.add.v32i32.v1024i32(<256 x i32> %acc, <1024 x i32> %m)
ret <256 x i32> %partial.reduce
}
;; Check for vector size that do not match an available vrmpy (2x reduction).
define <64 x i32> @partial_reduce_unsupported(<64 x i32> %acc, <128 x i8> %x, <128 x i8> %y) #1 {
; CHECK-LABEL: partial_reduce_unsupported:
; CHECK-HVX: vmpy
; CHECK-HVX: vadd
%x.ext = zext <128 x i8> %x to <128 x i32>
%y.ext = zext <128 x i8> %y to <128 x i32>
%m = mul nuw nsw <128 x i32> %x.ext, %y.ext
%partial.reduce = tail call <64 x i32> @llvm.vector.partial.reduce.add.v64i32.v128i32(<64 x i32> %acc, <128 x i32> %m)
ret <64 x i32> %partial.reduce
}
attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }
attributes #1 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" }