| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| |
| ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s |
| |
| declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2 |
| declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2 |
| |
| define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) { |
| ; CHECK-LABEL: i16_signed: |
| ; CHECK: # %bb.0: # %bb2 |
| ; CHECK-NEXT: vpksh %v24, %v24, %v26 |
| ; CHECK-NEXT: br %r14 |
| bb2: |
| %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128)) |
| %2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127)) |
| %3 = trunc nsw <16 x i16> %2 to <16 x i8> |
| ret <16 x i8> %3 |
| ret <16 x i8> %3 |
| } |
| |
| define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) { |
| ; CHECK-LABEL: i32_signed: |
| ; CHECK: # %bb.0: # %bb2 |
| ; CHECK-NEXT: vpksf %v24, %v24, %v26 |
| ; CHECK-NEXT: br %r14 |
| bb2: |
| %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768)) |
| %2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767)) |
| %3 = trunc nsw <8 x i32> %2 to <8 x i16> |
| ret <8 x i16> %3 |
| } |
| |
| define <4 x i32> @i64_signed(<2 x i64> %a, <2 x i64> %b) { |
| ; CHECK-LABEL: i64_signed: |
| ; CHECK: # %bb.0: # %bb2 |
| ; CHECK-NEXT: vpksg %v24, %v24, %v26 |
| ; CHECK-NEXT: br %r14 |
| bb2: |
| %0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %1 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %0, <4 x i64> splat (i64 -2147483648)) |
| %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> splat (i64 2147483647)) |
| %3 = trunc nsw <4 x i64> %2 to <4 x i32> |
| ret <4 x i32> %3 |
| } |
| |
| define <4 x i32> @i64_signed_flipped(<2 x i64> %a, <2 x i64> %b) { |
| ; CHECK-LABEL: i64_signed_flipped: |
| ; CHECK: # %bb.0: # %bb2 |
| ; CHECK-NEXT: vpksg %v24, %v24, %v26 |
| ; CHECK-NEXT: br %r14 |
| bb2: |
| %0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %1 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> splat (i64 2147483647), <4 x i64> %0) |
| %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> splat (i64 -2147483648), <4 x i64> %1) |
| %3 = trunc nsw <4 x i64> %2 to <4 x i32> |
| ret <4 x i32> %3 |
| } |
| |
| define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) { |
| ; CHECK-LABEL: i16_unsigned: |
| ; CHECK: # %bb.0: # %bb2 |
| ; CHECK-NEXT: vpklsh %v24, %v24, %v26 |
| ; CHECK-NEXT: br %r14 |
| bb2: |
| %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255)) |
| %2 = trunc nuw <16 x i16> %1 to <16 x i8> |
| ret <16 x i8> %2 |
| } |
| |
| define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) { |
| ; CHECK-LABEL: i32_unsigned: |
| ; CHECK: # %bb.0: # %bb2 |
| ; CHECK-NEXT: vpklsf %v24, %v24, %v26 |
| ; CHECK-NEXT: br %r14 |
| bb2: |
| %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535)) |
| %2 = trunc nsw <8 x i32> %1 to <8 x i16> |
| ret <8 x i16> %2 |
| } |
| |
| define <4 x i32> @i64_unsigned(<2 x i64> %a, <2 x i64> %b) { |
| ; CHECK-LABEL: i64_unsigned: |
| ; CHECK: # %bb.0: # %bb2 |
| ; CHECK-NEXT: vpklsg %v24, %v24, %v26 |
| ; CHECK-NEXT: br %r14 |
| bb2: |
| %0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %1 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %0, <4 x i64> splat (i64 4294967295)) |
| %2 = trunc nuw <4 x i64> %1 to <4 x i32> |
| ret <4 x i32> %2 |
| } |