| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple aarch64 | FileCheck %s --check-prefixes=CHECK-LE |
| ; RUN: llc < %s -mtriple aarch64_be | FileCheck %s --check-prefixes=CHECK-BE |
| |
| ; Note: Currently, interleaved stores are only directly supported on AArch64 for 64 bit or 128 bit vector data. |
| ; Thus, this test cannot cover lowering to `STNPSi`. |
| |
| define void @test_stnp_interleaved_store2_v2i32(<2 x i32> %v0, <2 x i32> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v2i32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-LE-NEXT: zip1 v0.4s, v0.4s, v1.4s |
| ; CHECK-LE-NEXT: mov d1, v0.d[1] |
| ; CHECK-LE-NEXT: stnp d0, d1, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v2i32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v2.2s, v1.2s |
| ; CHECK-BE-NEXT: rev64 v1.2s, v0.2s |
| ; CHECK-BE-NEXT: st2 { v1.2s, v2.2s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <2 x i32> %v0, <2 x i32> %v1, |
| <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| store <4 x i32> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v4i16(<4 x i16> %v0, <4 x i16> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v4i16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-LE-NEXT: zip1 v0.8h, v0.8h, v1.8h |
| ; CHECK-LE-NEXT: mov d1, v0.d[1] |
| ; CHECK-LE-NEXT: stnp d0, d1, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v4i16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v2.4h, v1.4h |
| ; CHECK-BE-NEXT: rev64 v1.4h, v0.4h |
| ; CHECK-BE-NEXT: st2 { v1.4h, v2.4h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <4 x i16> %v0, <4 x i16> %v1, |
| <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| store <8 x i16> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v8i8(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v8i8: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-LE-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; CHECK-LE-NEXT: mov d1, v0.d[1] |
| ; CHECK-LE-NEXT: stnp d0, d1, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v8i8: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v2.8b, v1.8b |
| ; CHECK-BE-NEXT: rev64 v1.8b, v0.8b |
| ; CHECK-BE-NEXT: st2 { v1.8b, v2.8b }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <8 x i8> %v0, <8 x i8> %v1, |
| <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, |
| i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| store <16 x i8> %shuffle, ptr %ptr, align 1, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v2f32(<2 x float> %v0, <2 x float> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v2f32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-LE-NEXT: zip1 v0.4s, v0.4s, v1.4s |
| ; CHECK-LE-NEXT: mov d1, v0.d[1] |
| ; CHECK-LE-NEXT: stnp d0, d1, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v2f32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v2.2s, v1.2s |
| ; CHECK-BE-NEXT: rev64 v1.2s, v0.2s |
| ; CHECK-BE-NEXT: st2 { v1.2s, v2.2s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <2 x float> %v0, <2 x float> %v1, |
| <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| store <4 x float> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v4f16(<4 x half> %v0, <4 x half> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v4f16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-LE-NEXT: zip1 v0.8h, v0.8h, v1.8h |
| ; CHECK-LE-NEXT: mov d1, v0.d[1] |
| ; CHECK-LE-NEXT: stnp d0, d1, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v4f16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v2.4h, v1.4h |
| ; CHECK-BE-NEXT: rev64 v1.4h, v0.4h |
| ; CHECK-BE-NEXT: st2 { v1.4h, v2.4h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <4 x half> %v0, <4 x half> %v1, |
| <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| store <8 x half> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v2i64(<2 x i64> %v0, <2 x i64> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v2i64: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: zip2 v2.2d, v0.2d, v1.2d |
| ; CHECK-LE-NEXT: zip1 v0.2d, v0.2d, v1.2d |
| ; CHECK-LE-NEXT: stnp q0, q2, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v2i64: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st2 { v1.2d, v2.2d }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <2 x i64> %v0, <2 x i64> %v1, |
| <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| store <4 x i64> %shuffle, ptr %ptr, align 8, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v4i32(<4 x i32> %v0, <4 x i32> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v4i32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: zip2 v2.4s, v0.4s, v1.4s |
| ; CHECK-LE-NEXT: zip1 v0.4s, v0.4s, v1.4s |
| ; CHECK-LE-NEXT: stnp q0, q2, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v4i32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v1.4s, v1.4s |
| ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s |
| ; CHECK-BE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st2 { v1.4s, v2.4s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <4 x i32> %v0, <4 x i32> %v1, |
| <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| store <8 x i32> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v8i16(<8 x i16> %v0, <8 x i16> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v8i16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: zip2 v2.8h, v0.8h, v1.8h |
| ; CHECK-LE-NEXT: zip1 v0.8h, v0.8h, v1.8h |
| ; CHECK-LE-NEXT: stnp q0, q2, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v8i16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v1.8h, v1.8h |
| ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h |
| ; CHECK-BE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st2 { v1.8h, v2.8h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <8 x i16> %v0, <8 x i16> %v1, |
| <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, |
| i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| store <16 x i16> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v16i8(<16 x i8> %v0, <16 x i8> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v16i8: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: adrp x8, .LCPI8_0 |
| ; CHECK-LE-NEXT: adrp x9, .LCPI8_1 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] |
| ; CHECK-LE-NEXT: ldr q3, [x9, :lo12:.LCPI8_1] |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-LE-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v2.16b |
| ; CHECK-LE-NEXT: tbl v0.16b, { v0.16b }, v3.16b |
| ; CHECK-LE-NEXT: stnp q0, q2, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v16i8: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v1.16b, v1.16b |
| ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v2.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v2.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: st2 { v2.16b, v3.16b }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <16 x i8> %v0, <16 x i8> %v1, |
| <32 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, |
| i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15, |
| i32 8, i32 16, i32 9, i32 17, i32 10, i32 18, i32 11, i32 19, |
| i32 12, i32 20, i32 13, i32 21, i32 14, i32 22, i32 15, i32 23> |
| store <32 x i8> %shuffle, ptr %ptr, align 1, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v2f64(<2 x double> %v0, <2 x double> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v2f64: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: zip2 v2.2d, v0.2d, v1.2d |
| ; CHECK-LE-NEXT: zip1 v0.2d, v0.2d, v1.2d |
| ; CHECK-LE-NEXT: stnp q0, q2, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v2f64: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st2 { v1.2d, v2.2d }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <2 x double> %v0, <2 x double> %v1, |
| <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| store <4 x double> %shuffle, ptr %ptr, align 8, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v4f32(<4 x float> %v0, <4 x float> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v4f32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: zip2 v2.4s, v0.4s, v1.4s |
| ; CHECK-LE-NEXT: zip1 v0.4s, v0.4s, v1.4s |
| ; CHECK-LE-NEXT: stnp q0, q2, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v4f32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v1.4s, v1.4s |
| ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s |
| ; CHECK-BE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st2 { v1.4s, v2.4s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <4 x float> %v0, <4 x float> %v1, |
| <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| store <8 x float> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v8f16(<8 x half> %v0, <8 x half> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v8f16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: zip2 v2.8h, v0.8h, v1.8h |
| ; CHECK-LE-NEXT: zip1 v0.8h, v0.8h, v1.8h |
| ; CHECK-LE-NEXT: stnp q0, q2, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v8f16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v1.8h, v1.8h |
| ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h |
| ; CHECK-BE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st2 { v1.8h, v2.8h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <8 x half> %v0, <8 x half> %v1, |
| <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, |
| i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| store <16 x half> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v2i32(<2 x i32> %v0, <2 x i32> %v1, <2 x i32> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v2i32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: st3 { v0.2s, v1.2s, v2.2s }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v2i32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v4.2s, v2.2s |
| ; CHECK-BE-NEXT: rev64 v3.2s, v1.2s |
| ; CHECK-BE-NEXT: rev64 v2.2s, v0.2s |
| ; CHECK-BE-NEXT: st3 { v2.2s, v3.2s, v4.2s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %s1 = shufflevector <2 x i32> %v2, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> |
| %shuffle = shufflevector <4 x i32> %s0, <4 x i32> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> |
| store <6 x i32> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v4i16(<4 x i16> %v0, <4 x i16> %v1, <4 x i16> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v4i16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: st3 { v0.4h, v1.4h, v2.4h }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v4i16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v4.4h, v2.4h |
| ; CHECK-BE-NEXT: rev64 v3.4h, v1.4h |
| ; CHECK-BE-NEXT: rev64 v2.4h, v0.4h |
| ; CHECK-BE-NEXT: st3 { v2.4h, v3.4h, v4.4h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x i16> %v2, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| %shuffle = shufflevector <8 x i16> %s0, <8 x i16> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> |
| store <12 x i16> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v8i8(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v8i8: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: st3 { v0.8b, v1.8b, v2.8b }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v8i8: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v4.8b, v2.8b |
| ; CHECK-BE-NEXT: rev64 v3.8b, v1.8b |
| ; CHECK-BE-NEXT: rev64 v2.8b, v0.8b |
| ; CHECK-BE-NEXT: st3 { v2.8b, v3.8b, v4.8b }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %s1 = shufflevector <8 x i8> %v2, <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| %shuffle = shufflevector <16 x i8> %s0, <16 x i8> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23> |
| store <24 x i8> %shuffle, ptr %ptr, align 1, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v2f32(<2 x float> %v0, <2 x float> %v1, <2 x float> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v2f32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: st3 { v0.2s, v1.2s, v2.2s }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v2f32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v4.2s, v2.2s |
| ; CHECK-BE-NEXT: rev64 v3.2s, v1.2s |
| ; CHECK-BE-NEXT: rev64 v2.2s, v0.2s |
| ; CHECK-BE-NEXT: st3 { v2.2s, v3.2s, v4.2s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <2 x float> %v0, <2 x float> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %s1 = shufflevector <2 x float> %v2, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> |
| %shuffle = shufflevector <4 x float> %s0, <4 x float> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> |
| store <6 x float> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v4f16(<4 x half> %v0, <4 x half> %v1, <4 x half> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v4f16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 |
| ; CHECK-LE-NEXT: st3 { v0.4h, v1.4h, v2.4h }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v4f16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v4.4h, v2.4h |
| ; CHECK-BE-NEXT: rev64 v3.4h, v1.4h |
| ; CHECK-BE-NEXT: rev64 v2.4h, v0.4h |
| ; CHECK-BE-NEXT: st3 { v2.4h, v3.4h, v4.4h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <4 x half> %v0, <4 x half> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x half> %v2, <4 x half> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| %shuffle = shufflevector <8 x half> %s0, <8 x half> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> |
| store <12 x half> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v2i64(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v2i64: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: st3 { v0.2d, v1.2d, v2.2d }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v2i64: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: ext v4.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v2.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st3 { v2.2d, v3.2d, v4.2d }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %s1 = shufflevector <2 x i64> %v2, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> |
| %shuffle = shufflevector <4 x i64> %s0, <4 x i64> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> |
| store <6 x i64> %shuffle, ptr %ptr, align 8, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v4i32(<4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v4i32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: st3 { v0.4s, v1.4s, v2.4s }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v4i32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v2.4s, v2.4s |
| ; CHECK-BE-NEXT: rev64 v1.4s, v1.4s |
| ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s |
| ; CHECK-BE-NEXT: ext v4.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v2.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st3 { v2.4s, v3.4s, v4.4s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| %shuffle = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> |
| store <12 x i32> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v8i16(<8 x i16> %v0, <8 x i16> %v1, <8 x i16> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v8i16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: st3 { v0.8h, v1.8h, v2.8h }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v8i16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v2.8h, v2.8h |
| ; CHECK-BE-NEXT: rev64 v1.8h, v1.8h |
| ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h |
| ; CHECK-BE-NEXT: ext v4.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v2.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st3 { v2.8h, v3.8h, v4.8h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %s1 = shufflevector <8 x i16> %v2, <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| %shuffle = shufflevector <16 x i16> %s0, <16 x i16> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23> |
| store <24 x i16> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v16i8(<16 x i8> %v0, <16 x i8> %v1, <16 x i8> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v16i8: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: st3 { v0.16b, v1.16b, v2.16b }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v16i8: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v2.16b, v2.16b |
| ; CHECK-BE-NEXT: rev64 v1.16b, v1.16b |
| ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: ext v4.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v2.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st3 { v2.16b, v3.16b, v4.16b }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> |
| %s1 = shufflevector <16 x i8> %v2, <16 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| %shuffle = shufflevector <32 x i8> %s0, <32 x i8> %s1, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47> |
| store <48 x i8> %shuffle, ptr %ptr, align 1, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v2f64(<2 x double> %v0, <2 x double> %v1, <2 x double> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v2f64: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: st3 { v0.2d, v1.2d, v2.2d }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v2f64: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: ext v4.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v2.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st3 { v2.2d, v3.2d, v4.2d }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %s1 = shufflevector <2 x double> %v2, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> |
| %shuffle = shufflevector <4 x double> %s0, <4 x double> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> |
| store <6 x double> %shuffle, ptr %ptr, align 8, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v4f32(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v4f32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: st3 { v0.4s, v1.4s, v2.4s }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v4f32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v2.4s, v2.4s |
| ; CHECK-BE-NEXT: rev64 v1.4s, v1.4s |
| ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s |
| ; CHECK-BE-NEXT: ext v4.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v2.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st3 { v2.4s, v3.4s, v4.4s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <4 x float> %v0, <4 x float> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x float> %v2, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| %shuffle = shufflevector <8 x float> %s0, <8 x float> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> |
| store <12 x float> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store3_v8f16(<8 x half> %v0, <8 x half> %v1, <8 x half> %v2, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store3_v8f16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 |
| ; CHECK-LE-NEXT: st3 { v0.8h, v1.8h, v2.8h }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store3_v8f16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v2.8h, v2.8h |
| ; CHECK-BE-NEXT: rev64 v1.8h, v1.8h |
| ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h |
| ; CHECK-BE-NEXT: ext v4.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v2.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st3 { v2.8h, v3.8h, v4.8h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <8 x half> %v0, <8 x half> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %s1 = shufflevector <8 x half> %v2, <8 x half> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| %shuffle = shufflevector <16 x half> %s0, <16 x half> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23> |
| store <24 x half> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| ; Test conservative lowering of a st4 matching patterns |
| |
| define void @test_stnp_interleaved_store4_v2i32(<2 x i32> %v0, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v2i32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v2i32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v6.2s, v3.2s |
| ; CHECK-BE-NEXT: rev64 v5.2s, v2.2s |
| ; CHECK-BE-NEXT: rev64 v4.2s, v1.2s |
| ; CHECK-BE-NEXT: rev64 v3.2s, v0.2s |
| ; CHECK-BE-NEXT: st4 { v3.2s, v4.2s, v5.2s, v6.2s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %s1 = shufflevector <2 x i32> %v2, <2 x i32> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %shuffle = shufflevector <4 x i32> %s0, <4 x i32> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> |
| store <8 x i32> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v4i16(<4 x i16> %v0, <4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v4i16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v4i16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v6.4h, v3.4h |
| ; CHECK-BE-NEXT: rev64 v5.4h, v2.4h |
| ; CHECK-BE-NEXT: rev64 v4.4h, v1.4h |
| ; CHECK-BE-NEXT: rev64 v3.4h, v0.4h |
| ; CHECK-BE-NEXT: st4 { v3.4h, v4.4h, v5.4h, v6.4h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x i16> %v2, <4 x i16> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %shuffle = shufflevector <8 x i16> %s0, <8 x i16> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> |
| store <16 x i16> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v8i8(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v8i8: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v8i8: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v6.8b, v3.8b |
| ; CHECK-BE-NEXT: rev64 v5.8b, v2.8b |
| ; CHECK-BE-NEXT: rev64 v4.8b, v1.8b |
| ; CHECK-BE-NEXT: rev64 v3.8b, v0.8b |
| ; CHECK-BE-NEXT: st4 { v3.8b, v4.8b, v5.8b, v6.8b }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %s1 = shufflevector <8 x i8> %v2, <8 x i8> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %shuffle = shufflevector <16 x i8> %s0, <16 x i8> %s1, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31> |
| store <32 x i8> %shuffle, ptr %ptr, align 1, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v2f32(<2 x float> %v0, <2 x float> %v1, <2 x float> %v2, <2 x float> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v2f32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v2f32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v6.2s, v3.2s |
| ; CHECK-BE-NEXT: rev64 v5.2s, v2.2s |
| ; CHECK-BE-NEXT: rev64 v4.2s, v1.2s |
| ; CHECK-BE-NEXT: rev64 v3.2s, v0.2s |
| ; CHECK-BE-NEXT: st4 { v3.2s, v4.2s, v5.2s, v6.2s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <2 x float> %v0, <2 x float> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %s1 = shufflevector <2 x float> %v2, <2 x float> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %shuffle = shufflevector <4 x float> %s0, <4 x float> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> |
| store <8 x float> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v4f16(<4 x half> %v0, <4 x half> %v1, <4 x half> %v2, <4 x half> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v4f16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 |
| ; CHECK-LE-NEXT: st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v4f16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v6.4h, v3.4h |
| ; CHECK-BE-NEXT: rev64 v5.4h, v2.4h |
| ; CHECK-BE-NEXT: rev64 v4.4h, v1.4h |
| ; CHECK-BE-NEXT: rev64 v3.4h, v0.4h |
| ; CHECK-BE-NEXT: st4 { v3.4h, v4.4h, v5.4h, v6.4h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <4 x half> %v0, <4 x half> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x half> %v2, <4 x half> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %shuffle = shufflevector <8 x half> %s0, <8 x half> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> |
| store <16 x half> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v2i64(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v2i64: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v2i64: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: ext v6.16b, v3.16b, v3.16b, #8 |
| ; CHECK-BE-NEXT: ext v5.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v4.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st4 { v3.2d, v4.2d, v5.2d, v6.2d }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %s1 = shufflevector <2 x i64> %v2, <2 x i64> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %shuffle = shufflevector <4 x i64> %s0, <4 x i64> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> |
| store <8 x i64> %shuffle, ptr %ptr, align 8, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v4i32(<4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v4i32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v4i32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v3.4s, v3.4s |
| ; CHECK-BE-NEXT: rev64 v2.4s, v2.4s |
| ; CHECK-BE-NEXT: rev64 v1.4s, v1.4s |
| ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s |
| ; CHECK-BE-NEXT: ext v6.16b, v3.16b, v3.16b, #8 |
| ; CHECK-BE-NEXT: ext v5.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v4.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st4 { v3.4s, v4.4s, v5.4s, v6.4s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %shuffle = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> |
| store <16 x i32> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v8i16(<8 x i16> %v0, <8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v8i16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v8i16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v3.8h, v3.8h |
| ; CHECK-BE-NEXT: rev64 v2.8h, v2.8h |
| ; CHECK-BE-NEXT: rev64 v1.8h, v1.8h |
| ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h |
| ; CHECK-BE-NEXT: ext v6.16b, v3.16b, v3.16b, #8 |
| ; CHECK-BE-NEXT: ext v5.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v4.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st4 { v3.8h, v4.8h, v5.8h, v6.8h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %s1 = shufflevector <8 x i16> %v2, <8 x i16> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %shuffle = shufflevector <16 x i16> %s0, <16 x i16> %s1, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31> |
| store <32 x i16> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v16i8(<16 x i8> %v0, <16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v16i8: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v16i8: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v3.16b, v3.16b |
| ; CHECK-BE-NEXT: rev64 v2.16b, v2.16b |
| ; CHECK-BE-NEXT: rev64 v1.16b, v1.16b |
| ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: ext v6.16b, v3.16b, v3.16b, #8 |
| ; CHECK-BE-NEXT: ext v5.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v4.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st4 { v3.16b, v4.16b, v5.16b, v6.16b }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> |
| %s1 = shufflevector <16 x i8> %v2, <16 x i8> %v3, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> |
| %shuffle = shufflevector <32 x i8> %s0, <32 x i8> %s1, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63> |
| store <64 x i8> %shuffle, ptr %ptr, align 1, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v2f64(<2 x double> %v0, <2 x double> %v1, <2 x double> %v2, <2 x double> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v2f64: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v2f64: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: ext v6.16b, v3.16b, v3.16b, #8 |
| ; CHECK-BE-NEXT: ext v5.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v4.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st4 { v3.2d, v4.2d, v5.2d, v6.2d }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %s1 = shufflevector <2 x double> %v2, <2 x double> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %shuffle = shufflevector <4 x double> %s0, <4 x double> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> |
| store <8 x double> %shuffle, ptr %ptr, align 8, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v4f32(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2, <4 x float> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v4f32: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v4f32: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v3.4s, v3.4s |
| ; CHECK-BE-NEXT: rev64 v2.4s, v2.4s |
| ; CHECK-BE-NEXT: rev64 v1.4s, v1.4s |
| ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s |
| ; CHECK-BE-NEXT: ext v6.16b, v3.16b, v3.16b, #8 |
| ; CHECK-BE-NEXT: ext v5.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v4.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st4 { v3.4s, v4.4s, v5.4s, v6.4s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <4 x float> %v0, <4 x float> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x float> %v2, <4 x float> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %shuffle = shufflevector <8 x float> %s0, <8 x float> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> |
| store <16 x float> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store4_v8f16(<8 x half> %v0, <8 x half> %v1, <8 x half> %v2, <8 x half> %v3, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store4_v8f16: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 |
| ; CHECK-LE-NEXT: st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store4_v8f16: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v3.8h, v3.8h |
| ; CHECK-BE-NEXT: rev64 v2.8h, v2.8h |
| ; CHECK-BE-NEXT: rev64 v1.8h, v1.8h |
| ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h |
| ; CHECK-BE-NEXT: ext v6.16b, v3.16b, v3.16b, #8 |
| ; CHECK-BE-NEXT: ext v5.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v4.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: st4 { v3.8h, v4.8h, v5.8h, v6.8h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %s0 = shufflevector <8 x half> %v0, <8 x half> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %s1 = shufflevector <8 x half> %v2, <8 x half> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %shuffle = shufflevector <16 x half> %s0, <16 x half> %s1, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31> |
| store <32 x half> %shuffle, ptr %ptr, align 2, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v3i32_non_pow2_elt_count(<3 x i32> %v0, <3 x i32> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v3i32_non_pow2_elt_count: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: zip1 v2.4s, v0.4s, v1.4s |
| ; CHECK-LE-NEXT: zip2 v0.4s, v0.4s, v1.4s |
| ; CHECK-LE-NEXT: mov d3, v2.d[1] |
| ; CHECK-LE-NEXT: str d0, [x0, #16] |
| ; CHECK-LE-NEXT: stnp d2, d3, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v3i32_non_pow2_elt_count: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v1.4s, v1.4s |
| ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s |
| ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: zip2 v2.4s, v0.4s, v1.4s |
| ; CHECK-BE-NEXT: zip1 v0.4s, v0.4s, v1.4s |
| ; CHECK-BE-NEXT: rev64 v1.4s, v2.4s |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x0] |
| ; CHECK-BE-NEXT: str d1, [x0, #16] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <3 x i32> %v0, <3 x i32> %v1, |
| <6 x i32> <i32 0, i32 3, i32 1, i32 4, i32 2, i32 5> |
| store <6 x i32> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| define void @test_stnp_interleaved_store2_v2i24_non_pow2_elt_size(<2 x i24> %v0, <2 x i24> %v1, ptr %ptr) { |
| ; CHECK-LE-LABEL: test_stnp_interleaved_store2_v2i24_non_pow2_elt_size: |
| ; CHECK-LE: // %bb.0: // %entry |
| ; CHECK-LE-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-LE-NEXT: zip1 v2.2s, v0.2s, v1.2s |
| ; CHECK-LE-NEXT: fmov w9, s0 |
| ; CHECK-LE-NEXT: str h0, [x0] |
| ; CHECK-LE-NEXT: mov w8, v1.s[1] |
| ; CHECK-LE-NEXT: dup v1.2s, v0.s[1] |
| ; CHECK-LE-NEXT: lsr w9, w9, #16 |
| ; CHECK-LE-NEXT: mov w10, v2.s[1] |
| ; CHECK-LE-NEXT: strb w9, [x0, #2] |
| ; CHECK-LE-NEXT: fmov w9, s1 |
| ; CHECK-LE-NEXT: sturh w8, [x0, #9] |
| ; CHECK-LE-NEXT: lsr w8, w8, #16 |
| ; CHECK-LE-NEXT: str h1, [x0, #6] |
| ; CHECK-LE-NEXT: strb w8, [x0, #11] |
| ; CHECK-LE-NEXT: lsr w8, w10, #16 |
| ; CHECK-LE-NEXT: lsr w9, w9, #16 |
| ; CHECK-LE-NEXT: sturh w10, [x0, #3] |
| ; CHECK-LE-NEXT: strb w8, [x0, #5] |
| ; CHECK-LE-NEXT: strb w9, [x0, #8] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_stnp_interleaved_store2_v2i24_non_pow2_elt_size: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: rev64 v1.2s, v1.2s |
| ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s |
| ; CHECK-BE-NEXT: zip1 v2.2s, v0.2s, v1.2s |
| ; CHECK-BE-NEXT: fmov w9, s0 |
| ; CHECK-BE-NEXT: mov w8, v1.s[1] |
| ; CHECK-BE-NEXT: dup v1.2s, v0.s[1] |
| ; CHECK-BE-NEXT: stur b0, [x0, #2] |
| ; CHECK-BE-NEXT: lsr w9, w9, #8 |
| ; CHECK-BE-NEXT: mov w10, v2.s[1] |
| ; CHECK-BE-NEXT: strb w8, [x0, #11] |
| ; CHECK-BE-NEXT: lsr w8, w8, #8 |
| ; CHECK-BE-NEXT: strh w9, [x0] |
| ; CHECK-BE-NEXT: fmov w9, s1 |
| ; CHECK-BE-NEXT: sturh w8, [x0, #9] |
| ; CHECK-BE-NEXT: stur b1, [x0, #8] |
| ; CHECK-BE-NEXT: lsr w8, w10, #8 |
| ; CHECK-BE-NEXT: lsr w9, w9, #8 |
| ; CHECK-BE-NEXT: strb w10, [x0, #5] |
| ; CHECK-BE-NEXT: sturh w8, [x0, #3] |
| ; CHECK-BE-NEXT: strh w9, [x0, #6] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %shuffle = shufflevector <2 x i24> %v0, <2 x i24> %v1, |
| <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| store <4 x i24> %shuffle, ptr %ptr, align 4, !nontemporal !0 |
| ret void |
| } |
| |
| !0 = !{ i32 1 } |