| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=hexagon < %s | FileCheck %s |
| |
| ; Function Attrs: mustprogress nounwind |
| |
| define dso_local void @bf16_vec_add(ptr noundef %c, ptr noundef %a, ptr noundef %b) local_unnamed_addr #0 { |
| ; CHECK-LABEL: bf16_vec_add: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[R7:r[0-9]+]] = #-4 |
| ; CHECK-NEXT: [[V0:v[0-9]+]] = vmemu([[R2:r[0-9]+]]+#0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[R2]] = ##32768 |
| ; CHECK-NEXT: [[V1:v[0-9]+]] = vmemu([[R1:r[0-9]+]]+#0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[R6:r[0-9]+]] = ##131071 |
| ; CHECK-NEXT: [[V2:v[0-9]+]] = vxor([[V0]],[[V0]]) |
| ; CHECK-NEXT: [[V3:v[0-9]+]] = vxor([[V1]],[[V1]]) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V25:v[0-9]+]] = vsplat([[R2]]) |
| ; CHECK-NEXT: [[R5:r[0-9]+]] = #16 |
| ; CHECK-NEXT: [[V5_4:v[0-9]+:[0-9]+]].h = vshuffoe([[V0]].h,[[V2]].h) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V26:v[0-9]+]] = vsplat([[R6]]) |
| ; CHECK-NEXT: [[R4:r[0-9]+]] = #32767 |
| ; CHECK-NEXT: [[V31_30:v[0-9]+:[0-9]+]].h = vshuffoe([[V1]].h,[[V3]].h) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V5_4]] = vshuff([[V5:v[0-9]+]],[[V4:v[0-9]+]],[[R7]]) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V31:v[0-9]+]].h = vsplat([[R4]]) |
| ; CHECK-NEXT: [[V3_2:v[0-9]+:[0-9]+]] = vshuff([[V31]],[[V30:v[0-9]+]],[[R7]]) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V2]].qf32 = vadd([[V2]].sf,[[V4]].sf) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V3]].qf32 = vadd([[V3]].sf,[[V5]].sf) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V2]].sf = [[V2]].qf32 |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V3]].sf = [[V3]].qf32 |
| ; CHECK-NEXT: [[V27:v[0-9]+]] = vand([[V2]],[[V25]]) |
| ; CHECK-NEXT: [[V28:v[0-9]+]] = vand([[V2]],[[V26]]) |
| ; CHECK-NEXT: [[Q2:q[0-9]+]] = vcmp.eq([[V2]].sf,[[V2]].sf) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V29:v[0-9]+]] = vand([[V3]],[[V25]]) |
| ; CHECK-NEXT: [[V1]] = vand([[V3]],[[V26]]) |
| ; CHECK-NEXT: [[Q0:q[0-9]+]] = vcmp.eq([[V28]].w,[[V25]].w) |
| ; CHECK-NEXT: [[V4]].w = vadd([[V2]].w,[[V27]].w) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V5]].w = vadd([[V3]].w,[[V29]].w) |
| ; CHECK-NEXT: [[Q1:q[0-9]+]] = vcmp.eq([[V1]].w,[[V25]].w) |
| ; CHECK-NEXT: [[V30:v[0-9]+]] = vmux([[Q0]],[[V2]],[[V4]]) |
| ; CHECK-NEXT: [[Q3:q[0-9]+]] = vcmp.eq([[V3]].sf,[[V3]].sf) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V1]] = vmux([[Q1]],[[V3]],[[V5]]) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V0]].uw = vlsr([[V30]].uw,[[R5]]) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V1]].uw = vlsr([[V1]].uw,[[R5]]) |
| ; CHECK-NEXT: [[V0]] = vmux([[Q2]],[[V0]],[[V31]]) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V1]] = vmux([[Q3]],[[V1]],[[V31]]) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: [[V0]].uh = vpack([[V1]].w,[[V0]].w):sat |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr [[R31:r[0-9]+]] |
| ; CHECK-NEXT: vmemu([[R0:r[0-9]+]]+#0) = [[V0]] |
| ; CHECK-NEXT: } |
| |
| |
| entry: |
| %0 = load <64 x bfloat>, ptr %a, align 2 |
| %1 = load <64 x bfloat>, ptr %b, align 2 |
| %add.ripple.vectorized = fadd <64 x bfloat> %0, %1 |
| store <64 x bfloat> %add.ripple.vectorized, ptr %c, align 2 |
| ret void |
| } |
| |
| define dso_local void @copy1d(ptr noundef readonly captures(none) %X, ptr noundef writeonly captures(none) %Y) local_unnamed_addr #0 { |
| ; CHECK-LABEL: copy1d: |
| ; CHECK: v[[X_HI:[0-9]+]] = vmemu(r0+#1) |
| ; CHECK: v[[X_LO:[0-9]+]] = vmemu(r0+#0) |
| ; CHECK: vmemu(r1+#1) = v[[X_HI]] |
| ; CHECK: jumpr [[RET:r[0-9]+]] |
| ; CHECK: vmemu(r1+#0) = v[[X_LO]] |
| entry: |
| %0 = load <128 x half>, ptr %X, align 2 |
| store <128 x half> %0, ptr %Y, align 2 |
| ret void |
| } |
| |
| attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv81" "target-features"="+hvx-length128b,+hvx-qfloat,+hvxv81,+v81,-long-calls" } |