blob: 1e5cb3c197261487b0470d31d52473569196b1b9 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=hexagon < %s | FileCheck %s
; Function Attrs: mustprogress nounwind
define dso_local void @bf16_vec_add(ptr noundef %c, ptr noundef %a, ptr noundef %b) local_unnamed_addr #0 {
; CHECK-LABEL: bf16_vec_add:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: {
; CHECK-NEXT: r7 = #-4
; CHECK-NEXT: r6 = ##131071
; CHECK-NEXT: allocframe(#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r5 = #16
; CHECK-NEXT: v0 = vmemu(r2+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v26 = vsplat(r6)
; CHECK-NEXT: r2 = ##32768
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v25 = vsplat(r2)
; CHECK-NEXT: v2 = vxor(v0,v0)
; CHECK-NEXT: v1 = vmemu(r1+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r4 = #32767
; CHECK-NEXT: v5:4.h = vshuffoe(v0.h,v2.h)
; CHECK-NEXT: v3 = vxor(v1,v1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v31:30.h = vshuffoe(v1.h,v3.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v5:4 = vshuff(v5,v4,r7)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v31.h = vsplat(r4)
; CHECK-NEXT: v3:2 = vshuff(v31,v30,r7)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2.qf32 = vadd(v2.sf,v4.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v3.qf32 = vadd(v3.sf,v5.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2.sf = v2.qf32
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v3.sf = v3.qf32
; CHECK-NEXT: v27 = vand(v2,v25)
; CHECK-NEXT: v28 = vand(v2,v26)
; CHECK-NEXT: q2 = vcmp.eq(v2.sf,v2.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v29 = vand(v3,v25)
; CHECK-NEXT: v1 = vand(v3,v26)
; CHECK-NEXT: q0 = vcmp.eq(v28.w,v25.w)
; CHECK-NEXT: v4.w = vadd(v2.w,v27.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v5.w = vadd(v3.w,v29.w)
; CHECK-NEXT: q1 = vcmp.eq(v1.w,v25.w)
; CHECK-NEXT: v30 = vmux(q0,v2,v4)
; CHECK-NEXT: q3 = vcmp.eq(v3.sf,v3.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v1 = vmux(q1,v3,v5)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0.uw = vlsr(v30.uw,r5)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v1.uw = vlsr(v1.uw,r5)
; CHECK-NEXT: v0 = vmux(q2,v0,v31)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v1 = vmux(q3,v1,v31)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0.uh = vpack(v1.w,v0.w):sat
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: vmemu(r0+#0) = v0
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
; CHECK-NEXT: }
entry:
%0 = load <64 x bfloat>, ptr %a, align 2
%1 = load <64 x bfloat>, ptr %b, align 2
%add.ripple.vectorized = fadd <64 x bfloat> %0, %1
store <64 x bfloat> %add.ripple.vectorized, ptr %c, align 2
ret void
}
define dso_local void @copy1d(ptr noundef readonly captures(none) %X, ptr noundef writeonly captures(none) %Y) local_unnamed_addr #0 {
; CHECK-LABEL: copy1d:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: {
; CHECK-NEXT: allocframe(r29,#0):raw
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmemu(r0+#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v1 = vmemu(r0+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: vmemu(r1+#1) = v0
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: vmemu(r1+#0) = v1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
; CHECK-NEXT: }
entry:
%0 = load <128 x half>, ptr %X, align 2
store <128 x half> %0, ptr %Y, align 2
ret void
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv81" "target-features"="+hvx-length128b,+hvx-qfloat,+hvxv81,+v81,-long-calls" }