| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mattr=+simd128 | FileCheck %s |
| |
| ;; Test that SIMD bitmask instruction can be selected |
| |
| target triple = "wasm32-unknown-unknown" |
| |
| define i16 @bitmask_v16i8(<16 x i8> %v) { |
| ; CHECK-LABEL: bitmask_v16i8: |
| ; CHECK: .functype bitmask_v16i8 (v128) -> (i32) |
| ; CHECK-NEXT: # %bb.0: |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| ; CHECK-NEXT: i8x16.eq |
| ; CHECK-NEXT: i8x16.bitmask |
| ; CHECK-NEXT: # fallthrough-return |
| %cmp = icmp eq <16 x i8> %v, zeroinitializer |
| %bitmask = bitcast <16 x i1> %cmp to i16 |
| ret i16 %bitmask |
| } |
| |
| define i8 @bitmask_v8i16(<8 x i16> %v) { |
| ; CHECK-LABEL: bitmask_v8i16: |
| ; CHECK: .functype bitmask_v8i16 (v128) -> (i32) |
| ; CHECK-NEXT: # %bb.0: |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0 |
| ; CHECK-NEXT: i16x8.eq |
| ; CHECK-NEXT: i16x8.bitmask |
| ; CHECK-NEXT: # fallthrough-return |
| %cmp = icmp eq <8 x i16> %v, zeroinitializer |
| %bitmask = bitcast <8 x i1> %cmp to i8 |
| ret i8 %bitmask |
| } |
| |
| define i8 @bitmask_v4i32(<4 x i32> %v) { |
| ; CHECK-LABEL: bitmask_v4i32: |
| ; CHECK: .functype bitmask_v4i32 (v128) -> (i32) |
| ; CHECK-NEXT: # %bb.0: |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: v128.const 0, 0, 0, 0 |
| ; CHECK-NEXT: i32x4.eq |
| ; CHECK-NEXT: i32x4.bitmask |
| ; CHECK-NEXT: # fallthrough-return |
| %cmp = icmp eq <4 x i32> %v, zeroinitializer |
| %bitmask = bitcast <4 x i1> %cmp to i4 |
| %ext = zext i4 %bitmask to i8 |
| ret i8 %ext |
| } |
| |
| define i8 @bitmask_v2i64(<2 x i64> %v) { |
| ; CHECK-LABEL: bitmask_v2i64: |
| ; CHECK: .functype bitmask_v2i64 (v128) -> (i32) |
| ; CHECK-NEXT: # %bb.0: |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: v128.const 0, 0 |
| ; CHECK-NEXT: i64x2.eq |
| ; CHECK-NEXT: i64x2.bitmask |
| ; CHECK-NEXT: # fallthrough-return |
| %cmp = icmp eq <2 x i64> %v, zeroinitializer |
| %bitmask = bitcast <2 x i1> %cmp to i2 |
| %ext = zext i2 %bitmask to i8 |
| ret i8 %ext |
| } |
| |
| ;; Test unusual vectors |
| |
| define i1 @bitmask_v1i8(<1 x i8> %v) { |
| ; CHECK-LABEL: bitmask_v1i8: |
| ; CHECK: .functype bitmask_v1i8 (v128) -> (i32) |
| ; CHECK-NEXT: # %bb.0: |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 0 |
| ; CHECK-NEXT: i32.eqz |
| ; CHECK-NEXT: # fallthrough-return |
| %cmp = icmp eq <1 x i8> %v, zeroinitializer |
| %bitmask = bitcast <1 x i1> %cmp to i1 |
| ret i1 %bitmask |
| } |
| |
| define i7 @bitmask_v7i8(<7 x i8> %v) { |
| ; CHECK-LABEL: bitmask_v7i8: |
| ; CHECK: .functype bitmask_v7i8 (i32, i32, i32, i32, i32, i32, i32) -> (i32) |
| ; CHECK-NEXT: .local v128 |
| ; CHECK-NEXT: # %bb.0: |
| ; CHECK-NEXT: global.get __stack_pointer |
| ; CHECK-NEXT: i32.const 16 |
| ; CHECK-NEXT: i32.sub |
| ; CHECK-NEXT: drop |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.splat |
| ; CHECK-NEXT: local.get 1 |
| ; CHECK-NEXT: i8x16.replace_lane 1 |
| ; CHECK-NEXT: local.get 2 |
| ; CHECK-NEXT: i8x16.replace_lane 2 |
| ; CHECK-NEXT: local.get 3 |
| ; CHECK-NEXT: i8x16.replace_lane 3 |
| ; CHECK-NEXT: local.get 4 |
| ; CHECK-NEXT: i8x16.replace_lane 4 |
| ; CHECK-NEXT: local.get 5 |
| ; CHECK-NEXT: i8x16.replace_lane 5 |
| ; CHECK-NEXT: local.get 6 |
| ; CHECK-NEXT: i8x16.replace_lane 6 |
| ; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| ; CHECK-NEXT: i8x16.eq |
| ; CHECK-NEXT: local.tee 7 |
| ; CHECK-NEXT: i16x8.extract_lane_u 0 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: local.get 7 |
| ; CHECK-NEXT: i16x8.extend_low_i8x16_s |
| ; CHECK-NEXT: local.tee 7 |
| ; CHECK-NEXT: i16x8.extract_lane_u 1 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 7 |
| ; CHECK-NEXT: i16x8.extract_lane_u 2 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 2 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 7 |
| ; CHECK-NEXT: i16x8.extract_lane_u 3 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 3 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 7 |
| ; CHECK-NEXT: i16x8.extract_lane_u 4 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 4 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 7 |
| ; CHECK-NEXT: i16x8.extract_lane_u 5 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 5 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 7 |
| ; CHECK-NEXT: i16x8.extract_lane_u 6 |
| ; CHECK-NEXT: i32.const 6 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.const 127 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: # fallthrough-return |
| %cmp = icmp eq <7 x i8> %v, zeroinitializer |
| %bitmask = bitcast <7 x i1> %cmp to i7 |
| ret i7 %bitmask |
| } |
| |
| define i8 @bitmask_v8i8(<8 x i8> %v) { |
| ; CHECK-LABEL: bitmask_v8i8: |
| ; CHECK: .functype bitmask_v8i8 (v128) -> (i32) |
| ; CHECK-NEXT: # %bb.0: |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| ; CHECK-NEXT: i8x16.eq |
| ; CHECK-NEXT: i16x8.extend_low_i8x16_s |
| ; CHECK-NEXT: i16x8.bitmask |
| ; CHECK-NEXT: # fallthrough-return |
| %cmp = icmp eq <8 x i8> %v, zeroinitializer |
| %bitmask = bitcast <8 x i1> %cmp to i8 |
| ret i8 %bitmask |
| } |
| |
| define i32 @bitmask_v32i8(<32 x i8> %v) { |
| ; CHECK-LABEL: bitmask_v32i8: |
| ; CHECK: .functype bitmask_v32i8 (v128, v128) -> (i32) |
| ; CHECK-NEXT: .local v128 |
| ; CHECK-NEXT: # %bb.0: |
| ; CHECK-NEXT: global.get __stack_pointer |
| ; CHECK-NEXT: i32.const 16 |
| ; CHECK-NEXT: i32.sub |
| ; CHECK-NEXT: drop |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| ; CHECK-NEXT: local.tee 2 |
| ; CHECK-NEXT: i8x16.eq |
| ; CHECK-NEXT: local.tee 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 0 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 1 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 2 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 2 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 3 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 3 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 4 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 4 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 5 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 5 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 6 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 6 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 7 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 7 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 8 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 8 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 9 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 9 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 10 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 10 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 11 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 11 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 12 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 12 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 13 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 13 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 14 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 14 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 15 |
| ; CHECK-NEXT: i32.const 15 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.const 65535 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: local.get 1 |
| ; CHECK-NEXT: local.get 2 |
| ; CHECK-NEXT: i8x16.eq |
| ; CHECK-NEXT: local.tee 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 15 |
| ; CHECK-NEXT: i32.const 31 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 14 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 30 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 13 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 29 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 12 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 28 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 11 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 27 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 10 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 26 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 9 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 25 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 8 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 24 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 7 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 23 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 6 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 22 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 5 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 21 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 4 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 20 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 3 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 19 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 2 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 18 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 1 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 17 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i8x16.extract_lane_u 0 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: i32.const 16 |
| ; CHECK-NEXT: i32.shl |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: i32.or |
| ; CHECK-NEXT: # fallthrough-return |
| %cmp = icmp eq <32 x i8> %v, zeroinitializer |
| %bitmask = bitcast <32 x i1> %cmp to i32 |
| ret i32 %bitmask |
| } |