| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s -check-prefix=SSSE3 |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s -check-prefix=AVX2 |
| |
| define <8 x i16> @phaddsw_v8i16_intrinsic(<8 x i16> %a, <8 x i16> %b) { |
| ; SSSE3-LABEL: phaddsw_v8i16_intrinsic: |
| ; SSSE3: # %bb.0: |
| ; SSSE3-NEXT: phaddsw %xmm1, %xmm0 |
| ; SSSE3-NEXT: retq |
| ; |
| ; AVX2-LABEL: phaddsw_v8i16_intrinsic: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a, <8 x i16> %b) |
| ret <8 x i16> %res |
| } |
| |
| define <8 x i16> @phaddsw_v8i16_generic(<8 x i16> %a, <8 x i16> %b) { |
| ; SSSE3-LABEL: phaddsw_v8i16_generic: |
| ; SSSE3: # %bb.0: |
| ; SSSE3-NEXT: phaddsw %xmm1, %xmm0 |
| ; SSSE3-NEXT: retq |
| ; |
| ; AVX2-LABEL: phaddsw_v8i16_generic: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| %even = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| %odd = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| %sum = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %even, <8 x i16> %odd) |
| ret <8 x i16> %sum |
| } |
| |
| define <16 x i16> @phaddsw_v16i16_generic(<16 x i16> %a, <16 x i16> %b) { |
| ; SSSE3-LABEL: phaddsw_v16i16_generic: |
| ; SSSE3: # %bb.0: |
| ; SSSE3-NEXT: phaddsw %xmm1, %xmm0 |
| ; SSSE3-NEXT: phaddsw %xmm3, %xmm2 |
| ; SSSE3-NEXT: movdqa %xmm2, %xmm1 |
| ; SSSE3-NEXT: retq |
| ; |
| ; AVX2-LABEL: phaddsw_v16i16_generic: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] |
| ; AVX2-NEXT: retq |
| %even = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> |
| %odd = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> |
| %sum = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %even, <16 x i16> %odd) |
| ret <16 x i16> %sum |
| } |
| |
| define <8 x i16> @phsubsw_v8i16_intrinsic(<8 x i16> %a, <8 x i16> %b) { |
| ; SSSE3-LABEL: phsubsw_v8i16_intrinsic: |
| ; SSSE3: # %bb.0: |
| ; SSSE3-NEXT: phsubsw %xmm1, %xmm0 |
| ; SSSE3-NEXT: retq |
| ; |
| ; AVX2-LABEL: phsubsw_v8i16_intrinsic: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a, <8 x i16> %b) |
| ret <8 x i16> %res |
| } |
| |
| define <8 x i16> @phsubsw_v8i16_generic(<8 x i16> %a, <8 x i16> %b) { |
| ; SSSE3-LABEL: phsubsw_v8i16_generic: |
| ; SSSE3: # %bb.0: |
| ; SSSE3-NEXT: phsubsw %xmm1, %xmm0 |
| ; SSSE3-NEXT: retq |
| ; |
| ; AVX2-LABEL: phsubsw_v8i16_generic: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| %even = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| %odd = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| %diff = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %even, <8 x i16> %odd) |
| ret <8 x i16> %diff |
| } |
| |
| define <16 x i16> @phsubsw_v16i16_generic(<16 x i16> %a, <16 x i16> %b) { |
| ; SSSE3-LABEL: phsubsw_v16i16_generic: |
| ; SSSE3: # %bb.0: |
| ; SSSE3-NEXT: phsubsw %xmm1, %xmm0 |
| ; SSSE3-NEXT: phsubsw %xmm3, %xmm2 |
| ; SSSE3-NEXT: movdqa %xmm2, %xmm1 |
| ; SSSE3-NEXT: retq |
| ; |
| ; AVX2-LABEL: phsubsw_v16i16_generic: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] |
| ; AVX2-NEXT: retq |
| %even = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> |
| %odd = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> |
| %diff = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %even, <16 x i16> %odd) |
| ret <16 x i16> %diff |
| } |