| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD |
| ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI |
| |
| ; CHECK-GI: warning: Instruction selection used fallback path for shuffle_zip1 |
| ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shuffle_zip2 |
| ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shuffle_zip3 |
| |
| define <8 x i8> @vzipi8(ptr %A, ptr %B) nounwind { |
| ; CHECK-LABEL: vzipi8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: ldr d1, [x1] |
| ; CHECK-NEXT: zip1.8b v2, v0, v1 |
| ; CHECK-NEXT: zip2.8b v0, v0, v1 |
| ; CHECK-NEXT: add.8b v0, v2, v0 |
| ; CHECK-NEXT: ret |
| %tmp1 = load <8 x i8>, ptr %A |
| %tmp2 = load <8 x i8>, ptr %B |
| %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> |
| %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| %tmp5 = add <8 x i8> %tmp3, %tmp4 |
| ret <8 x i8> %tmp5 |
| } |
| |
| define <4 x i16> @vzipi16(ptr %A, ptr %B) nounwind { |
| ; CHECK-LABEL: vzipi16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: ldr d1, [x1] |
| ; CHECK-NEXT: zip1.4h v2, v0, v1 |
| ; CHECK-NEXT: zip2.4h v0, v0, v1 |
| ; CHECK-NEXT: add.4h v0, v2, v0 |
| ; CHECK-NEXT: ret |
| %tmp1 = load <4 x i16>, ptr %A |
| %tmp2 = load <4 x i16>, ptr %B |
| %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> |
| %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> |
| %tmp5 = add <4 x i16> %tmp3, %tmp4 |
| ret <4 x i16> %tmp5 |
| } |
| |
| define <16 x i8> @vzipQi8(ptr %A, ptr %B) nounwind { |
| ; CHECK-LABEL: vzipQi8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ldr q1, [x1] |
| ; CHECK-NEXT: zip1.16b v2, v0, v1 |
| ; CHECK-NEXT: zip2.16b v0, v0, v1 |
| ; CHECK-NEXT: add.16b v0, v2, v0 |
| ; CHECK-NEXT: ret |
| %tmp1 = load <16 x i8>, ptr %A |
| %tmp2 = load <16 x i8>, ptr %B |
| %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> |
| %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> |
| %tmp5 = add <16 x i8> %tmp3, %tmp4 |
| ret <16 x i8> %tmp5 |
| } |
| |
| define <8 x i16> @vzipQi16(ptr %A, ptr %B) nounwind { |
| ; CHECK-LABEL: vzipQi16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ldr q1, [x1] |
| ; CHECK-NEXT: zip1.8h v2, v0, v1 |
| ; CHECK-NEXT: zip2.8h v0, v0, v1 |
| ; CHECK-NEXT: add.8h v0, v2, v0 |
| ; CHECK-NEXT: ret |
| %tmp1 = load <8 x i16>, ptr %A |
| %tmp2 = load <8 x i16>, ptr %B |
| %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> |
| %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| %tmp5 = add <8 x i16> %tmp3, %tmp4 |
| ret <8 x i16> %tmp5 |
| } |
| |
| define <4 x i32> @vzipQi32(ptr %A, ptr %B) nounwind { |
| ; CHECK-LABEL: vzipQi32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ldr q1, [x1] |
| ; CHECK-NEXT: zip1.4s v2, v0, v1 |
| ; CHECK-NEXT: zip2.4s v0, v0, v1 |
| ; CHECK-NEXT: add.4s v0, v2, v0 |
| ; CHECK-NEXT: ret |
| %tmp1 = load <4 x i32>, ptr %A |
| %tmp2 = load <4 x i32>, ptr %B |
| %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> |
| %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> |
| %tmp5 = add <4 x i32> %tmp3, %tmp4 |
| ret <4 x i32> %tmp5 |
| } |
| |
| define <4 x float> @vzipQf(ptr %A, ptr %B) nounwind { |
| ; CHECK-LABEL: vzipQf: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ldr q1, [x1] |
| ; CHECK-NEXT: zip1.4s v2, v0, v1 |
| ; CHECK-NEXT: zip2.4s v0, v0, v1 |
| ; CHECK-NEXT: fadd.4s v0, v2, v0 |
| ; CHECK-NEXT: ret |
| %tmp1 = load <4 x float>, ptr %A |
| %tmp2 = load <4 x float>, ptr %B |
| %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> |
| %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> |
| %tmp5 = fadd <4 x float> %tmp3, %tmp4 |
| ret <4 x float> %tmp5 |
| } |
| |
| ; Undef shuffle indices should not prevent matching to VZIP: |
| |
| define <8 x i8> @vzipi8_undef(ptr %A, ptr %B) nounwind { |
| ; CHECK-LABEL: vzipi8_undef: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr d0, [x0] |
| ; CHECK-NEXT: ldr d1, [x1] |
| ; CHECK-NEXT: zip1.8b v2, v0, v1 |
| ; CHECK-NEXT: zip2.8b v0, v0, v1 |
| ; CHECK-NEXT: add.8b v0, v2, v0 |
| ; CHECK-NEXT: ret |
| %tmp1 = load <8 x i8>, ptr %A |
| %tmp2 = load <8 x i8>, ptr %B |
| %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11> |
| %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15> |
| %tmp5 = add <8 x i8> %tmp3, %tmp4 |
| ret <8 x i8> %tmp5 |
| } |
| |
| define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind { |
| ; CHECK-LABEL: vzipQi8_undef: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ldr q1, [x1] |
| ; CHECK-NEXT: zip1.16b v2, v0, v1 |
| ; CHECK-NEXT: zip2.16b v0, v0, v1 |
| ; CHECK-NEXT: add.16b v0, v2, v0 |
| ; CHECK-NEXT: ret |
| %tmp1 = load <16 x i8>, ptr %A |
| %tmp2 = load <16 x i8>, ptr %B |
| %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> |
| %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31> |
| %tmp5 = add <16 x i8> %tmp3, %tmp4 |
| ret <16 x i8> %tmp5 |
| } |
| |
| define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind { |
| ; CHECK-LABEL: vzip1_undef_01: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip1.8h v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> |
| ret <8 x i16> %s |
| } |
| |
| define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind { |
| ; CHECK-LABEL: vzip1_undef_0: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip1.8h v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> |
| ret <8 x i16> %s |
| } |
| |
| define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind { |
| ; CHECK-LABEL: vzip1_undef_1: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip1.8h v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> |
| ret <8 x i16> %s |
| } |
| |
| define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind { |
| ; CHECK-LABEL: vzip1_undef_012: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip1.8h v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11> |
| ret <8 x i16> %s |
| } |
| |
| define <8 x i16> @vzip2_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind { |
| ; CHECK-LABEL: vzip2_undef_01: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip2.8h v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| ret <8 x i16> %s |
| } |
| |
| define <8 x i16> @vzip2_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind { |
| ; CHECK-LABEL: vzip2_undef_0: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip2.8h v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| ret <8 x i16> %s |
| } |
| |
| define <8 x i16> @vzip2_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind { |
| ; CHECK-LABEL: vzip2_undef_1: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip2.8h v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 4, i32 undef, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| ret <8 x i16> %s |
| } |
| |
| define <8 x i16> @vzip2_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind { |
| ; CHECK-LABEL: vzip2_undef_012: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip2.8h v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 6, i32 14, i32 7, i32 15> |
| ret <8 x i16> %s |
| } |
| |
| define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) { |
| ; CHECK-LABEL: combine_v16i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: zip1.16b v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| ret <16 x i8> %3 |
| } |
| |
| define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) { |
| ; CHECK-SD-LABEL: combine2_v16i8: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-SD-NEXT: zip1.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: combine2_v16i8: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: zip1.8b v2, v0, v1 |
| ; CHECK-GI-NEXT: zip2.8b v0, v0, v1 |
| ; CHECK-GI-NEXT: mov.d v2[1], v0[0] |
| ; CHECK-GI-NEXT: mov.16b v0, v2 |
| ; CHECK-GI-NEXT: ret |
| %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> |
| %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ret <16 x i8> %5 |
| } |
| |
| define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) { |
| ; CHECK-LABEL: combine_v8i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: zip1.8h v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ret <8 x i16> %3 |
| } |
| |
| define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) { |
| ; CHECK-SD-LABEL: combine2_v8i16: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-SD-NEXT: zip1.8h v0, v0, v1 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: combine2_v8i16: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: zip1.4h v2, v0, v1 |
| ; CHECK-GI-NEXT: zip2.4h v0, v0, v1 |
| ; CHECK-GI-NEXT: mov.d v2[1], v0[0] |
| ; CHECK-GI-NEXT: mov.16b v0, v2 |
| ; CHECK-GI-NEXT: ret |
| %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> |
| %4 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> |
| %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ret <8 x i16> %5 |
| } |
| |
| define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) { |
| ; CHECK-LABEL: combine_v4i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: zip1.4s v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ret <4 x i32> %3 |
| } |
| |
| define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) { |
| ; CHECK-SD-LABEL: combine2_v4i32: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-SD-NEXT: zip1.4s v0, v0, v1 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: combine2_v4i32: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: zip1.2s v2, v0, v1 |
| ; CHECK-GI-NEXT: zip2.2s v0, v0, v1 |
| ; CHECK-GI-NEXT: mov.d v2[1], v0[0] |
| ; CHECK-GI-NEXT: mov.16b v0, v2 |
| ; CHECK-GI-NEXT: ret |
| %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 2> |
| %4 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 3> |
| %5 = shufflevector <2 x i32> %3, <2 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ret <4 x i32> %5 |
| } |
| |
| define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) { |
| ; CHECK-LABEL: combine_v16i8_undef: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: zip1.16b v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| ret <16 x i8> %3 |
| } |
| |
| define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) { |
| ; CHECK-SD-LABEL: combine2_v16i8_undef: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-SD-NEXT: zip1.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: combine2_v16i8_undef: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: zip1.8b v2, v0, v1 |
| ; CHECK-GI-NEXT: zip2.8b v0, v0, v1 |
| ; CHECK-GI-NEXT: mov.d v2[1], v0[0] |
| ; CHECK-GI-NEXT: mov.16b v0, v2 |
| ; CHECK-GI-NEXT: ret |
| %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> |
| %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ret <16 x i8> %5 |
| } |
| |
| define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) { |
| ; CHECK-LABEL: combine_v8i16_undef: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: zip1.8h v0, v0, v1 |
| ; CHECK-NEXT: ret |
| %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ret <8 x i16> %3 |
| } |
| |
| ; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled |
| define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { |
| ; CHECK-SD-LABEL: combine_v8i16_8first: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI25_0 |
| ; CHECK-SD-NEXT: fmov d2, d0 |
| ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI25_0] |
| ; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: combine_v8i16_8first: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI25_0 |
| ; CHECK-GI-NEXT: fmov d31, d1 |
| ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0] |
| ; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2 |
| ; CHECK-GI-NEXT: ret |
| %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> |
| ret <16 x i8> %3 |
| } |
| |
| |
| ; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled |
| define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { |
| ; CHECK-SD-LABEL: combine_v8i16_8firstundef: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI26_0 |
| ; CHECK-SD-NEXT: fmov d2, d0 |
| ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI26_0] |
| ; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: combine_v8i16_8firstundef: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI26_0 |
| ; CHECK-GI-NEXT: fmov d31, d1 |
| ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0] |
| ; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2 |
| ; CHECK-GI-NEXT: ret |
| %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef> |
| ret <16 x i8> %3 |
| } |
| |
| define <4 x float> @shuffle_zip1(<4 x float> %arg) { |
| ; CHECK-LABEL: shuffle_zip1: |
| ; CHECK: // %bb.0: // %bb |
| ; CHECK-NEXT: fcmgt.4s v0, v0, #0.0 |
| ; CHECK-NEXT: uzp1.8h v1, v0, v0 |
| ; CHECK-NEXT: xtn.4h v0, v0 |
| ; CHECK-NEXT: xtn.4h v1, v1 |
| ; CHECK-NEXT: zip2.4h v0, v0, v1 |
| ; CHECK-NEXT: fmov.4s v1, #1.00000000 |
| ; CHECK-NEXT: zip1.4h v0, v0, v0 |
| ; CHECK-NEXT: sshll.4s v0, v0, #0 |
| ; CHECK-NEXT: and.16b v0, v1, v0 |
| ; CHECK-NEXT: ret |
| bb: |
| %inst = fcmp olt <4 x float> zeroinitializer, %arg |
| %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0> |
| %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1> |
| %inst3 = select <4 x i1> %inst2, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> zeroinitializer |
| ret <4 x float> %inst3 |
| } |
| |
| define <4 x i32> @shuffle_zip2(<4 x i32> %arg) { |
| ; CHECK-LABEL: shuffle_zip2: |
| ; CHECK: // %bb.0: // %bb |
| ; CHECK-NEXT: cmtst.4s v0, v0, v0 |
| ; CHECK-NEXT: uzp1.8h v1, v0, v0 |
| ; CHECK-NEXT: xtn.4h v0, v0 |
| ; CHECK-NEXT: xtn.4h v1, v1 |
| ; CHECK-NEXT: zip2.4h v0, v0, v1 |
| ; CHECK-NEXT: movi.4s v1, #1 |
| ; CHECK-NEXT: zip1.4h v0, v0, v0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: and.16b v0, v0, v1 |
| ; CHECK-NEXT: ret |
| bb: |
| %inst = icmp ult <4 x i32> zeroinitializer, %arg |
| %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0> |
| %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1> |
| %inst3 = select <4 x i1> %inst2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer |
| ret <4 x i32> %inst3 |
| } |
| |
| define <4 x i32> @shuffle_zip3(<4 x i32> %arg) { |
| ; CHECK-LABEL: shuffle_zip3: |
| ; CHECK: // %bb.0: // %bb |
| ; CHECK-NEXT: cmgt.4s v0, v0, #0 |
| ; CHECK-NEXT: uzp1.8h v1, v0, v0 |
| ; CHECK-NEXT: xtn.4h v0, v0 |
| ; CHECK-NEXT: xtn.4h v1, v1 |
| ; CHECK-NEXT: zip2.4h v0, v0, v1 |
| ; CHECK-NEXT: movi.4s v1, #1 |
| ; CHECK-NEXT: zip1.4h v0, v0, v0 |
| ; CHECK-NEXT: sshll.4s v0, v0, #0 |
| ; CHECK-NEXT: and.16b v0, v0, v1 |
| ; CHECK-NEXT: ret |
| bb: |
| %inst = icmp slt <4 x i32> zeroinitializer, %arg |
| %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0> |
| %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1> |
| %inst3 = select <4 x i1> %inst2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer |
| ret <4 x i32> %inst3 |
| } |