; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mattr=+simd128 | FileCheck %s

;; Test that SIMD bitmask instruction can be selected

target triple = "wasm32-unknown-unknown"

define i16 @bitmask_v16i8(<16 x i8> %v) {
; CHECK-LABEL: bitmask_v16i8:
; CHECK:         .functype bitmask_v16i8 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    i8x16.eq
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <16 x i8> %v, zeroinitializer
  %bitmask = bitcast <16 x i1> %cmp to i16
  ret i16 %bitmask
}

define i8 @bitmask_v8i16(<8 x i16> %v) {
; CHECK-LABEL: bitmask_v8i16:
; CHECK:         .functype bitmask_v8i16 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    i16x8.eq
; CHECK-NEXT:    i16x8.bitmask
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <8 x i16> %v, zeroinitializer
  %bitmask = bitcast <8 x i1> %cmp to i8
  ret i8 %bitmask
}

define i8 @bitmask_v4i32(<4 x i32> %v) {
; CHECK-LABEL: bitmask_v4i32:
; CHECK:         .functype bitmask_v4i32 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0
; CHECK-NEXT:    i32x4.eq
; CHECK-NEXT:    i32x4.bitmask
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <4 x i32> %v, zeroinitializer
  %bitmask = bitcast <4 x i1> %cmp to i4
  %ext = zext i4 %bitmask to i8
  ret i8 %ext
}

define i8 @bitmask_v2i64(<2 x i64> %v) {
; CHECK-LABEL: bitmask_v2i64:
; CHECK:         .functype bitmask_v2i64 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0
; CHECK-NEXT:    i64x2.eq
; CHECK-NEXT:    i64x2.bitmask
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <2 x i64> %v, zeroinitializer
  %bitmask = bitcast <2 x i1> %cmp to i2
  %ext = zext i2 %bitmask to i8
  ret i8 %ext
}

;; Test unusual vectors

define i1 @bitmask_v1i8(<1 x i8> %v) {
; CHECK-LABEL: bitmask_v1i8:
; CHECK:         .functype bitmask_v1i8 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 0
; CHECK-NEXT:    i32.eqz
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <1 x i8> %v, zeroinitializer
  %bitmask = bitcast <1 x i1> %cmp to i1
  ret i1 %bitmask
}

define i7 @bitmask_v7i8(<7 x i8> %v) {
; CHECK-LABEL: bitmask_v7i8:
; CHECK:         .functype bitmask_v7i8 (i32, i32, i32, i32, i32, i32, i32) -> (i32)
; CHECK-NEXT:    .local v128
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    global.get __stack_pointer
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    i32.sub
; CHECK-NEXT:    drop
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.splat
; CHECK-NEXT:    local.get 1
; CHECK-NEXT:    i8x16.replace_lane 1
; CHECK-NEXT:    local.get 2
; CHECK-NEXT:    i8x16.replace_lane 2
; CHECK-NEXT:    local.get 3
; CHECK-NEXT:    i8x16.replace_lane 3
; CHECK-NEXT:    local.get 4
; CHECK-NEXT:    i8x16.replace_lane 4
; CHECK-NEXT:    local.get 5
; CHECK-NEXT:    i8x16.replace_lane 5
; CHECK-NEXT:    local.get 6
; CHECK-NEXT:    i8x16.replace_lane 6
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    i8x16.eq
; CHECK-NEXT:    local.tee 7
; CHECK-NEXT:    i16x8.extract_lane_u 0
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extend_low_i8x16_s
; CHECK-NEXT:    local.tee 7
; CHECK-NEXT:    i16x8.extract_lane_u 1
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extract_lane_u 2
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 2
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extract_lane_u 3
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 3
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extract_lane_u 4
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 4
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extract_lane_u 5
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 5
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extract_lane_u 6
; CHECK-NEXT:    i32.const 6
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.const 127
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <7 x i8> %v, zeroinitializer
  %bitmask = bitcast <7 x i1> %cmp to i7
  ret i7 %bitmask
}

define i8 @bitmask_v8i8(<8 x i8> %v) {
; CHECK-LABEL: bitmask_v8i8:
; CHECK:         .functype bitmask_v8i8 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    i8x16.eq
; CHECK-NEXT:    i16x8.extend_low_i8x16_s
; CHECK-NEXT:    i16x8.bitmask
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <8 x i8> %v, zeroinitializer
  %bitmask = bitcast <8 x i1> %cmp to i8
  ret i8 %bitmask
}

define i32 @bitmask_v32i8(<32 x i8> %v) {
; CHECK-LABEL: bitmask_v32i8:
; CHECK:         .functype bitmask_v32i8 (v128, v128) -> (i32)
; CHECK-NEXT:    .local v128
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    local.tee 2
; CHECK-NEXT:    i8x16.eq
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 1
; CHECK-NEXT:    local.get 2
; CHECK-NEXT:    i8x16.eq
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <32 x i8> %v, zeroinitializer
  %bitmask = bitcast <32 x i1> %cmp to i32
  ret i32 %bitmask
}

define i32 @manual_bitmask_i8x16(<16 x i8> %v) {
; CHECK-LABEL: manual_bitmask_i8x16:
; CHECK:         .functype manual_bitmask_i8x16 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    # fallthrough-return
  %1 = icmp slt <16 x i8> %v, zeroinitializer
  %2 = bitcast <16 x i1> %1 to i16
  %3 = zext i16 %2 to i32
  ret i32 %3
}

define i32 @manual_bitmask_i16x8(<8 x i16> %v) {
; CHECK-LABEL: manual_bitmask_i16x8:
; CHECK:         .functype manual_bitmask_i16x8 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i16x8.bitmask
; CHECK-NEXT:    # fallthrough-return
  %1 = icmp slt <8 x i16> %v, zeroinitializer
  %2 = bitcast <8 x i1> %1 to i8
  %3 = zext i8 %2 to i32
  ret i32 %3
}

define i32 @manual_bitmask_i32x4(<4 x i32> %v) {
; CHECK-LABEL: manual_bitmask_i32x4:
; CHECK:         .functype manual_bitmask_i32x4 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i32x4.bitmask
; CHECK-NEXT:    # fallthrough-return
  %1 = icmp slt <4 x i32> %v, zeroinitializer
  %2 = bitcast <4 x i1> %1 to i4
  %3 = zext i4 %2 to i32
  ret i32 %3
}

define i32 @manual_bitmask_i64x2(<2 x i64> %v) {
; CHECK-LABEL: manual_bitmask_i64x2:
; CHECK:         .functype manual_bitmask_i64x2 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i64x2.bitmask
; CHECK-NEXT:    # fallthrough-return
  %1 = icmp slt <2 x i64> %v, zeroinitializer
  %2 = bitcast <2 x i1> %1 to i2
  %3 = zext i2 %2 to i32
  ret i32 %3
}

define i32 @manual_bitmask_v8i8(<8 x i8> %v) {
; CHECK-LABEL: manual_bitmask_v8i8:
; CHECK:         .functype manual_bitmask_v8i8 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    i8x16.lt_s
; CHECK-NEXT:    i16x8.extend_low_i8x16_s
; CHECK-NEXT:    i16x8.bitmask
; CHECK-NEXT:    # fallthrough-return
  %1 = icmp slt <8 x i8> %v, zeroinitializer
  %2 = bitcast <8 x i1> %1 to i8
  %3 = zext i8 %2 to i32
  ret i32 %3
}

define i32 @manual_bitmask_v32i8(<32 x i8> %v) {
; CHECK-LABEL: manual_bitmask_v32i8:
; CHECK:         .functype manual_bitmask_v32i8 (v128, v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 1
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    # fallthrough-return
  %1 = icmp slt <32 x i8> %v, zeroinitializer
  %2 = bitcast <32 x i1> %1 to i32
  ret i32 %2
}

define i64 @manual_bitmask_v64i8(<64 x i8> %v) {
; CHECK-LABEL: manual_bitmask_v64i8:
; CHECK:         .functype manual_bitmask_v64i8 (v128, v128, v128, v128) -> (i64)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 1
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i64.extend_i32_u
; CHECK-NEXT:    i64.const 32
; CHECK-NEXT:    i64.shl
; CHECK-NEXT:    local.get 2
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    i64.extend_i32_u
; CHECK-NEXT:    i64.const 16
; CHECK-NEXT:    i64.shl
; CHECK-NEXT:    i64.or
; CHECK-NEXT:    local.get 3
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    i64.extend_i32_u
; CHECK-NEXT:    i64.or
; CHECK-NEXT:    # fallthrough-return
  %1 = icmp slt <64 x i8> %v, zeroinitializer
  %2 = bitcast <64 x i1> %1 to i64
  ret i64 %2
}
