| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with |
| ; vector intrinsics. |
| ; |
| ; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s |
| |
| declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>) |
| declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>) |
| declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>) |
| |
| ; PACKS_CC: i64 -> i32 |
| define <4 x i32> @f0() { |
| ; CHECK-LABEL: f0: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI0_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vpksgs %v24, %v0, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 0, i64 1>) |
| %extr = extractvalue {<4 x i32>, i32} %call, 0 |
| %trunc = trunc <4 x i32> %extr to <4 x i16> |
| %ret = sext <4 x i16> %trunc to <4 x i32> |
| ret <4 x i32> %ret |
| } |
| |
| ; PACKS_CC: i32 -> i16 |
| define <8 x i16> @f1() { |
| ; CHECK-LABEL: f1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI1_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vpksfs %v24, %v0, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>, |
| <4 x i32> <i32 0, i32 1, i32 1, i32 0>) |
| %extr = extractvalue {<8 x i16>, i32} %call, 0 |
| %trunc = trunc <8 x i16> %extr to <8 x i8> |
| %ret = sext <8 x i8> %trunc to <8 x i16> |
| ret <8 x i16> %ret |
| } |
| |
| ; PACKS_CC: i16 -> i8 |
| define <16 x i8> @f2() { |
| ; CHECK-LABEL: f2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI2_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vpkshs %v24, %v0, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call {<16 x i8>, i32} @llvm.s390.vpkshs( |
| <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>, |
| <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>) |
| %extr = extractvalue {<16 x i8>, i32} %call, 0 |
| %trunc = trunc <16 x i8> %extr to <16 x i4> |
| %ret = sext <16 x i4> %trunc to <16 x i8> |
| ret <16 x i8> %ret |
| } |
| |
| declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>) |
| declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>) |
| declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>) |
| |
| ; PACKLS_CC: i64 -> i32 |
| define <4 x i32> @f3() { |
| ; CHECK-LABEL: f3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI3_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: larl %r1, .LCPI3_1 |
| ; CHECK-NEXT: vl %v1, 0(%r1), 3 |
| ; CHECK-NEXT: vpklsgs %v24, %v1, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>) |
| %extr = extractvalue {<4 x i32>, i32} %call, 0 |
| %trunc = trunc <4 x i32> %extr to <4 x i16> |
| %ret = sext <4 x i16> %trunc to <4 x i32> |
| ret <4 x i32> %ret |
| } |
| |
| ; PACKLS_CC: i32 -> i16 |
| define <8 x i16> @f4() { |
| ; CHECK-LABEL: f4: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI4_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vpklsfs %v24, %v0, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>, |
| <4 x i32> <i32 0, i32 1, i32 1, i32 0>) |
| %extr = extractvalue {<8 x i16>, i32} %call, 0 |
| %trunc = trunc <8 x i16> %extr to <8 x i8> |
| %ret = sext <8 x i8> %trunc to <8 x i16> |
| ret <8 x i16> %ret |
| } |
| |
| ; PACKLS_CC: i16 -> i8 |
| define <16 x i8> @f5() { |
| ; CHECK-LABEL: f5: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI5_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vpklshs %v24, %v0, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call {<16 x i8>, i32} @llvm.s390.vpklshs( |
| <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>, |
| <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>) |
| %extr = extractvalue {<16 x i8>, i32} %call, 0 |
| %trunc = trunc <16 x i8> %extr to <16 x i4> |
| %ret = sext <16 x i4> %trunc to <16 x i8> |
| ret <16 x i8> %ret |
| } |
| |
| declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>) |
| declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>) |
| declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>) |
| |
| ; PACKS: i64 -> i32 |
| define <4 x i32> @f6() { |
| ; CHECK-LABEL: f6: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI6_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: larl %r1, .LCPI6_1 |
| ; CHECK-NEXT: vl %v1, 0(%r1), 3 |
| ; CHECK-NEXT: vpksg %v24, %v1, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>) |
| %trunc = trunc <4 x i32> %call to <4 x i16> |
| %ret = sext <4 x i16> %trunc to <4 x i32> |
| ret <4 x i32> %ret |
| } |
| |
| ; PACKS: i32 -> i16 |
| define <8 x i16> @f7() { |
| ; CHECK-LABEL: f7: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI7_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vpksf %v24, %v0, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>, |
| <4 x i32> <i32 0, i32 1, i32 1, i32 0>) |
| %trunc = trunc <8 x i16> %call to <8 x i8> |
| %ret = sext <8 x i8> %trunc to <8 x i16> |
| ret <8 x i16> %ret |
| } |
| |
| ; PACKS: i16 -> i8 |
| define <16 x i8> @f8() { |
| ; CHECK-LABEL: f8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI8_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vpksh %v24, %v0, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call <16 x i8> @llvm.s390.vpksh( |
| <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>, |
| <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>) |
| %trunc = trunc <16 x i8> %call to <16 x i4> |
| %ret = sext <16 x i4> %trunc to <16 x i8> |
| ret <16 x i8> %ret |
| } |
| |
| declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>) |
| declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>) |
| declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>) |
| |
| ; PACKLS: i64 -> i32 |
| define <4 x i32> @f9() { |
| ; CHECK-LABEL: f9: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI9_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: larl %r1, .LCPI9_1 |
| ; CHECK-NEXT: vl %v1, 0(%r1), 3 |
| ; CHECK-NEXT: vpklsg %v24, %v1, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>) |
| %trunc = trunc <4 x i32> %call to <4 x i16> |
| %ret = sext <4 x i16> %trunc to <4 x i32> |
| ret <4 x i32> %ret |
| } |
| |
| ; PACKLS: i32 -> i16 |
| define <8 x i16> @f10() { |
| ; CHECK-LABEL: f10: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI10_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vpklsf %v24, %v0, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>, |
| <4 x i32> <i32 0, i32 1, i32 1, i32 0>) |
| %trunc = trunc <8 x i16> %call to <8 x i8> |
| %ret = sext <8 x i8> %trunc to <8 x i16> |
| ret <8 x i16> %ret |
| } |
| |
| ; PACKLS: i16 -> i8 |
| define <16 x i8> @f11() { |
| ; CHECK-LABEL: f11: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI11_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vpklsh %v24, %v0, %v0 |
| ; CHECK-NEXT: br %r14 |
| %call = call <16 x i8> @llvm.s390.vpklsh( |
| <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>, |
| <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>) |
| %trunc = trunc <16 x i8> %call to <16 x i4> |
| %ret = sext <16 x i4> %trunc to <16 x i8> |
| ret <16 x i8> %ret |
| } |
| |
| declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32) |
| |
| ; VPDI: |
| define <2 x i64> @f12() { |
| ; CHECK-LABEL: f12: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI12_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: larl %r1, .LCPI12_1 |
| ; CHECK-NEXT: vl %v1, 0(%r1), 3 |
| ; CHECK-NEXT: vpdi %v24, %v1, %v0, 0 |
| ; CHECK-NEXT: br %r14 |
| %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 1>, |
| <2 x i64> <i64 1, i64 0>, i32 0) |
| %trunc = trunc <2 x i64> %perm to <2 x i32> |
| %ret = sext <2 x i32> %trunc to <2 x i64> |
| ret <2 x i64> %ret |
| } |
| |
| declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32) |
| |
| ; VSLDB: |
| define <16 x i8> @f13() { |
| ; CHECK-LABEL: f13: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI13_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vsldb %v24, %v0, %v0, 1 |
| ; CHECK-NEXT: br %r14 |
| %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8> |
| <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1, |
| i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, <16 x i8> |
| <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1, |
| i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, |
| i32 1) |
| %trunc = trunc <16 x i8> %shfd to <16 x i4> |
| %ret = sext <16 x i4> %trunc to <16 x i8> |
| ret <16 x i8> %ret |
| } |
| |
| declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>) |
| |
| ; Test VPERM: |
| define <16 x i8> @f14() { |
| ; CHECK-LABEL: f14: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: larl %r1, .LCPI14_0 |
| ; CHECK-NEXT: vl %v0, 0(%r1), 3 |
| ; CHECK-NEXT: vperm %v24, %v0, %v0, %v0 |
| ; CHECK-NEXT: br %r14 |
| %perm = call <16 x i8> @llvm.s390.vperm( |
| <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1, |
| i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, |
| <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1, |
| i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, |
| <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1, |
| i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>) |
| %trunc = trunc <16 x i8> %perm to <16 x i4> |
| %ret = sext <16 x i4> %trunc to <16 x i8> |
| ret <16 x i8> %ret |
| } |