; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s

;
; UNDEF Elts
;

define <8 x i16> @undef_pmulhu_128(<8 x i16> %a0) {
; CHECK-LABEL: @undef_pmulhu_128(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> undef)
  ret <8 x i16> %1
}

define <8 x i16> @undef_pmulhu_128_commute(<8 x i16> %a0) {
; CHECK-LABEL: @undef_pmulhu_128_commute(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> undef, <8 x i16> %a0)
  ret <8 x i16> %1
}

define <16 x i16> @undef_pmulhu_256(<16 x i16> %a0) {
; CHECK-LABEL: @undef_pmulhu_256(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> undef)
  ret <16 x i16> %1
}

define <16 x i16> @undef_pmulhu_256_commute(<16 x i16> %a0) {
; CHECK-LABEL: @undef_pmulhu_256_commute(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> undef, <16 x i16> %a0)
  ret <16 x i16> %1
}

define <32 x i16> @undef_pmulhu_512(<32 x i16> %a0) {
; CHECK-LABEL: @undef_pmulhu_512(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> undef)
  ret <32 x i16> %1
}

define <32 x i16> @undef_pmulhu_512_commute(<32 x i16> %a0) {
; CHECK-LABEL: @undef_pmulhu_512_commute(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> undef, <32 x i16> %a0)
  ret <32 x i16> %1
}

;
; Zero Elts
;

define <8 x i16> @zero_pmulhu_128(<8 x i16> %a0) {
; CHECK-LABEL: @zero_pmulhu_128(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> zeroinitializer)
  ret <8 x i16> %1
}

define <8 x i16> @zero_pmulhu_128_commute(<8 x i16> %a0) {
; CHECK-LABEL: @zero_pmulhu_128_commute(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> zeroinitializer, <8 x i16> %a0)
  ret <8 x i16> %1
}

define <16 x i16> @zero_pmulhu_256(<16 x i16> %a0) {
; CHECK-LABEL: @zero_pmulhu_256(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> zeroinitializer)
  ret <16 x i16> %1
}

define <16 x i16> @zero_pmulhu_256_commute(<16 x i16> %a0) {
; CHECK-LABEL: @zero_pmulhu_256_commute(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> zeroinitializer, <16 x i16> %a0)
  ret <16 x i16> %1
}

define <32 x i16> @zero_pmulhu_512(<32 x i16> %a0) {
; CHECK-LABEL: @zero_pmulhu_512(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
  ret <32 x i16> %1
}

define <32 x i16> @zero_pmulhu_512_commute(<32 x i16> %a0) {
; CHECK-LABEL: @zero_pmulhu_512_commute(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> zeroinitializer, <32 x i16> %a0)
  ret <32 x i16> %1
}

;
; Multiply by One
;

define <8 x i16> @one_pmulhu_128(<8 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_128(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %1
}

define <8 x i16> @one_pmulhu_128_commute(<8 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_128_commute(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0)
  ret <8 x i16> %1
}

define <16 x i16> @one_pmulhu_256(<16 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_256(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <16 x i16> %1
}

define <16 x i16> @one_pmulhu_256_commute(<16 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_256_commute(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0)
  ret <16 x i16> %1
}

define <32 x i16> @one_pmulhu_512(<32 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_512(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <32 x i16> %1
}

define <32 x i16> @one_pmulhu_512_commute(<32 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_512_commute(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0)
  ret <32 x i16> %1
}

;
; Constant Folding
;

define <8 x i16> @fold_pmulhu_128() {
; CHECK-LABEL: @fold_pmulhu_128(
; CHECK-NEXT:    ret <8 x i16> <i16 -6, i16 0, i16 1, i16 32763, i16 -14, i16 5, i16 3, i16 32757>
;
  %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8>, <8 x i16> <i16 -5, i16 7, i16 -32768, i16 32765, i16 -9, i16 -11, i16 -32763, i16 32761>)
  ret <8 x i16> %1
}

define <16 x i16> @fold_pmulhu_256() {
; CHECK-LABEL: @fold_pmulhu_256(
; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 6, i16 1, i16 1, i16 -13, i16 -16, i16 3, i16 3, i16 12, i16 8, i16 -32766, i16 5, i16 16, i16 12, i16 -32764, i16 32748>
;
  %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15>, <16 x i16> <i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756>)
  ret <16 x i16> %1
}

define <32 x i16> @fold_pmulhu_512() {
; CHECK-LABEL: @fold_pmulhu_512(
; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 6, i16 1, i16 1, i16 -13, i16 -16, i16 3, i16 3, i16 12, i16 8, i16 -32766, i16 5, i16 16, i16 12, i16 -32764, i16 32748, i16 0, i16 6, i16 1, i16 1, i16 -13, i16 -16, i16 3, i16 3, i16 12, i16 8, i16 -32766, i16 5, i16 16, i16 12, i16 -32764, i16 32748>
;
  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15, i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756>, <32 x i16> <i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756, i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15>)
  ret <32 x i16> %1
}

;
; Demanded Elts
;

define <8 x i16> @elts_pmulhu_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: @elts_pmulhu_128(
; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
;
  %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2>
  %2 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
  %3 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %1, <8 x i16> %2)
  %4 = shufflevector <8 x i16> %3, <8 x i16> poison, <8 x i32> zeroinitializer
  ret <8 x i16> %4
}

define <16 x i16> @elts_pmulhu_256(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-LABEL: @elts_pmulhu_256(
; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
;
  %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  %2 = shufflevector <16 x i16> %a1, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2)
  %4 = shufflevector <16 x i16> %3, <16 x i16> poison, <16 x i32> zeroinitializer
  ret <16 x i16> %4
}

define <32 x i16> @elts_pmulhu_512(<32 x i16> %a0, <32 x i16> %a1) {
; CHECK-LABEL: @elts_pmulhu_512(
; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[A0:%.*]], <32 x i16> [[A1:%.*]])
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> poison, <32 x i32> zeroinitializer
; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
;
  %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  %2 = shufflevector <32 x i16> %a1, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  %3 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %1, <32 x i16> %2)
  %4 = shufflevector <32 x i16> %3, <32 x i16> poison, <32 x i32> zeroinitializer
  ret <32 x i16> %4
}

;
; Known Bits
;

define <8 x i16> @known_pmulhu_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
; CHECK-LABEL: @known_pmulhu_128(
; CHECK-NEXT:    ret <8 x i16> [[A2:%.*]]
;
  %x0 = lshr <8 x i16> %a0, <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
  %x1 = and <8 x i16> %a1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %m = tail call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %x0, <8 x i16> %x1)
  %r = add <8 x i16> %m, %a2
  ret <8 x i16> %r
}

define <16 x i16> @known_pmulhu_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2) {
; CHECK-LABEL: @known_pmulhu_256(
; CHECK-NEXT:    ret <16 x i16> [[A2:%.*]]
;
  %x0 = lshr <16 x i16> %a0, <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
  %x1 = and <16 x i16> %a1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %m = tail call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %x0, <16 x i16> %x1)
  %r = add <16 x i16> %m, %a2
  ret <16 x i16> %r
}

define <32 x i16> @known_pmulhu_512(<32 x i16> %a0, <32 x i16> %a1, <32 x i16> %a2) {
; CHECK-LABEL: @known_pmulhu_512(
; CHECK-NEXT:    ret <32 x i16> [[A2:%.*]]
;
  %x0 = lshr <32 x i16> %a0, <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
  %x1 = and <32 x i16> %a1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %m = tail call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1)
  %r = add <32 x i16> %m, %a2
  ret <32 x i16> %r
}
