blob: 47be92aaf0b31d0507d322728cfe0437eb68131c [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=hexagon < %s | FileCheck %s
; Function Attrs: mustprogress nounwind
define dso_local void @bf16_vec_add(ptr noundef %c, ptr noundef %a, ptr noundef %b) local_unnamed_addr #0 {
; CHECK-LABEL: bf16_vec_add:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: {
; CHECK-NEXT: [[R7:r[0-9]+]] = #-4
; CHECK-NEXT: [[V0:v[0-9]+]] = vmemu([[R2:r[0-9]+]]+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R2]] = ##32768
; CHECK-NEXT: [[V1:v[0-9]+]] = vmemu([[R1:r[0-9]+]]+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R6:r[0-9]+]] = ##131071
; CHECK-NEXT: [[V2:v[0-9]+]] = vxor([[V0]],[[V0]])
; CHECK-NEXT: [[V3:v[0-9]+]] = vxor([[V1]],[[V1]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V25:v[0-9]+]] = vsplat([[R2]])
; CHECK-NEXT: [[R5:r[0-9]+]] = #16
; CHECK-NEXT: [[V5_4:v[0-9]+:[0-9]+]].h = vshuffoe([[V0]].h,[[V2]].h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V26:v[0-9]+]] = vsplat([[R6]])
; CHECK-NEXT: [[R4:r[0-9]+]] = #32767
; CHECK-NEXT: [[V31_30:v[0-9]+:[0-9]+]].h = vshuffoe([[V1]].h,[[V3]].h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V5_4]] = vshuff([[V5:v[0-9]+]],[[V4:v[0-9]+]],[[R7]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V31:v[0-9]+]].h = vsplat([[R4]])
; CHECK-NEXT: [[V3_2:v[0-9]+:[0-9]+]] = vshuff([[V31]],[[V30:v[0-9]+]],[[R7]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V2]].qf32 = vadd([[V2]].sf,[[V4]].sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V3]].qf32 = vadd([[V3]].sf,[[V5]].sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V2]].sf = [[V2]].qf32
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V3]].sf = [[V3]].qf32
; CHECK-NEXT: [[V27:v[0-9]+]] = vand([[V2]],[[V25]])
; CHECK-NEXT: [[V28:v[0-9]+]] = vand([[V2]],[[V26]])
; CHECK-NEXT: [[Q2:q[0-9]+]] = vcmp.eq([[V2]].sf,[[V2]].sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V29:v[0-9]+]] = vand([[V3]],[[V25]])
; CHECK-NEXT: [[V1]] = vand([[V3]],[[V26]])
; CHECK-NEXT: [[Q0:q[0-9]+]] = vcmp.eq([[V28]].w,[[V25]].w)
; CHECK-NEXT: [[V4]].w = vadd([[V2]].w,[[V27]].w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V5]].w = vadd([[V3]].w,[[V29]].w)
; CHECK-NEXT: [[Q1:q[0-9]+]] = vcmp.eq([[V1]].w,[[V25]].w)
; CHECK-NEXT: [[V30:v[0-9]+]] = vmux([[Q0]],[[V2]],[[V4]])
; CHECK-NEXT: [[Q3:q[0-9]+]] = vcmp.eq([[V3]].sf,[[V3]].sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V1]] = vmux([[Q1]],[[V3]],[[V5]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V0]].uw = vlsr([[V30]].uw,[[R5]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V1]].uw = vlsr([[V1]].uw,[[R5]])
; CHECK-NEXT: [[V0]] = vmux([[Q2]],[[V0]],[[V31]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V1]] = vmux([[Q3]],[[V1]],[[V31]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[V0]].uh = vpack([[V1]].w,[[V0]].w):sat
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr [[R31:r[0-9]+]]
; CHECK-NEXT: vmemu([[R0:r[0-9]+]]+#0) = [[V0]]
; CHECK-NEXT: }
entry:
%0 = load <64 x bfloat>, ptr %a, align 2
%1 = load <64 x bfloat>, ptr %b, align 2
%add.ripple.vectorized = fadd <64 x bfloat> %0, %1
store <64 x bfloat> %add.ripple.vectorized, ptr %c, align 2
ret void
}
define dso_local void @copy1d(ptr noundef readonly captures(none) %X, ptr noundef writeonly captures(none) %Y) local_unnamed_addr #0 {
; CHECK-LABEL: copy1d:
; CHECK: v[[X_HI:[0-9]+]] = vmemu(r0+#1)
; CHECK: v[[X_LO:[0-9]+]] = vmemu(r0+#0)
; CHECK: vmemu(r1+#1) = v[[X_HI]]
; CHECK: jumpr [[RET:r[0-9]+]]
; CHECK: vmemu(r1+#0) = v[[X_LO]]
entry:
%0 = load <128 x half>, ptr %X, align 2
store <128 x half> %0, ptr %Y, align 2
ret void
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv81" "target-features"="+hvx-length128b,+hvx-qfloat,+hvxv81,+v81,-long-calls" }