blob: ca160c091b229340cc7295a7773314e54712afec [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mattr=+simd128 | FileCheck %s
;; Test that SIMD bitmask instruction can be selected
target triple = "wasm32-unknown-unknown"
define i16 @bitmask_v16i8(<16 x i8> %v) {
; CHECK-LABEL: bitmask_v16i8:
; CHECK: .functype bitmask_v16i8 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: i8x16.eq
; CHECK-NEXT: i8x16.bitmask
; CHECK-NEXT: # fallthrough-return
%cmp = icmp eq <16 x i8> %v, zeroinitializer
%bitmask = bitcast <16 x i1> %cmp to i16
ret i16 %bitmask
}
define i8 @bitmask_v8i16(<8 x i16> %v) {
; CHECK-LABEL: bitmask_v8i16:
; CHECK: .functype bitmask_v8i16 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: i16x8.eq
; CHECK-NEXT: i16x8.bitmask
; CHECK-NEXT: # fallthrough-return
%cmp = icmp eq <8 x i16> %v, zeroinitializer
%bitmask = bitcast <8 x i1> %cmp to i8
ret i8 %bitmask
}
define i8 @bitmask_v4i32(<4 x i32> %v) {
; CHECK-LABEL: bitmask_v4i32:
; CHECK: .functype bitmask_v4i32 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.const 0, 0, 0, 0
; CHECK-NEXT: i32x4.eq
; CHECK-NEXT: i32x4.bitmask
; CHECK-NEXT: # fallthrough-return
%cmp = icmp eq <4 x i32> %v, zeroinitializer
%bitmask = bitcast <4 x i1> %cmp to i4
%ext = zext i4 %bitmask to i8
ret i8 %ext
}
define i8 @bitmask_v2i64(<2 x i64> %v) {
; CHECK-LABEL: bitmask_v2i64:
; CHECK: .functype bitmask_v2i64 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.const 0, 0
; CHECK-NEXT: i64x2.eq
; CHECK-NEXT: i64x2.bitmask
; CHECK-NEXT: # fallthrough-return
%cmp = icmp eq <2 x i64> %v, zeroinitializer
%bitmask = bitcast <2 x i1> %cmp to i2
%ext = zext i2 %bitmask to i8
ret i8 %ext
}
;; Test unusual vectors
define i1 @bitmask_v1i8(<1 x i8> %v) {
; CHECK-LABEL: bitmask_v1i8:
; CHECK: .functype bitmask_v1i8 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 0
; CHECK-NEXT: i32.eqz
; CHECK-NEXT: # fallthrough-return
%cmp = icmp eq <1 x i8> %v, zeroinitializer
%bitmask = bitcast <1 x i1> %cmp to i1
ret i1 %bitmask
}
define i7 @bitmask_v7i8(<7 x i8> %v) {
; CHECK-LABEL: bitmask_v7i8:
; CHECK: .functype bitmask_v7i8 (i32, i32, i32, i32, i32, i32, i32) -> (i32)
; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get __stack_pointer
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.sub
; CHECK-NEXT: drop
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.splat
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.replace_lane 1
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i8x16.replace_lane 2
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i8x16.replace_lane 3
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i8x16.replace_lane 4
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i8x16.replace_lane 5
; CHECK-NEXT: local.get 6
; CHECK-NEXT: i8x16.replace_lane 6
; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: i8x16.eq
; CHECK-NEXT: local.tee 7
; CHECK-NEXT: i16x8.extract_lane_u 0
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i16x8.extend_low_i8x16_s
; CHECK-NEXT: local.tee 7
; CHECK-NEXT: i16x8.extract_lane_u 1
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i16x8.extract_lane_u 2
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 2
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i16x8.extract_lane_u 3
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 3
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i16x8.extract_lane_u 4
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 4
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i16x8.extract_lane_u 5
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 5
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i16x8.extract_lane_u 6
; CHECK-NEXT: i32.const 6
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.const 127
; CHECK-NEXT: i32.and
; CHECK-NEXT: # fallthrough-return
%cmp = icmp eq <7 x i8> %v, zeroinitializer
%bitmask = bitcast <7 x i1> %cmp to i7
ret i7 %bitmask
}
define i8 @bitmask_v8i8(<8 x i8> %v) {
; CHECK-LABEL: bitmask_v8i8:
; CHECK: .functype bitmask_v8i8 (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: i8x16.eq
; CHECK-NEXT: i16x8.extend_low_i8x16_s
; CHECK-NEXT: i16x8.bitmask
; CHECK-NEXT: # fallthrough-return
%cmp = icmp eq <8 x i8> %v, zeroinitializer
%bitmask = bitcast <8 x i1> %cmp to i8
ret i8 %bitmask
}
define i32 @bitmask_v32i8(<32 x i8> %v) {
; CHECK-LABEL: bitmask_v32i8:
; CHECK: .functype bitmask_v32i8 (v128, v128) -> (i32)
; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get __stack_pointer
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.sub
; CHECK-NEXT: drop
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: local.tee 2
; CHECK-NEXT: i8x16.eq
; CHECK-NEXT: local.tee 0
; CHECK-NEXT: i8x16.extract_lane_u 0
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 1
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 2
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 2
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 3
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 3
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 4
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 4
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 5
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 5
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 6
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 6
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 7
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 7
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 8
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 9
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 9
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 10
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 10
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 11
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 11
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 12
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 12
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 13
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 13
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 14
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 14
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 15
; CHECK-NEXT: i32.const 15
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32.and
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i8x16.eq
; CHECK-NEXT: local.tee 0
; CHECK-NEXT: i8x16.extract_lane_u 15
; CHECK-NEXT: i32.const 31
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 14
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 30
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 13
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 29
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 12
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 28
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 11
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 27
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 10
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 26
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 9
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 25
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 8
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 7
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 23
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 6
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 22
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 5
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 21
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 4
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 20
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 3
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 19
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 2
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 18
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 1
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 17
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 0
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: # fallthrough-return
%cmp = icmp eq <32 x i8> %v, zeroinitializer
%bitmask = bitcast <32 x i1> %cmp to i32
ret i32 %bitmask
}