[WebAssembly] Improve codegen for v128.bitselect

Add patterns selecting ((v1 ^ v2) & c) ^ v2 and ((v1 ^ v2) & ~c) ^ v2 to
v128.bitselect.

Resolves #56827.

Reviewed By: aheejin

Differential Revision: https://reviews.llvm.org/D131131

GitOrigin-RevId: b19de814add2c2d9496c0b2fcc8a47024a58e491
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index ed3cc7e..14202a8 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -811,6 +811,20 @@
             (and (vnot V128:$c), (vec.vt V128:$v2)))),
           (BITSELECT $v1, $v2, $c)>;
 
+// Bitselect is also equivalent to ((v1 ^ v2) & c) ^ v2
+foreach vec = IntVecs in
+def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
+                            (vec.vt V128:$c)),
+                       (vec.vt V128:$v2))),
+          (BITSELECT $v1, $v2, $c)>;
+
+// Same pattern with `c` negated so `a` and `b` get swapped.
+foreach vec = IntVecs in
+def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
+                            (vnot (vec.vt V128:$c))),
+                       (vec.vt V128:$v2))),
+          (BITSELECT $v2, $v1, $c)>;
+
 // Also implement vselect in terms of bitselect
 foreach vec = AllVecs in
 def : Pat<(vec.vt (vselect
diff --git a/test/CodeGen/WebAssembly/simd-arith.ll b/test/CodeGen/WebAssembly/simd-arith.ll
index dc33f36..78fdccc 100644
--- a/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/test/CodeGen/WebAssembly/simd-arith.ll
@@ -343,6 +343,39 @@
   ret <16 x i8> %a
 }
 
+; CHECK-LABEL: bitselect_xor_v16i8:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_v16i8 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
+ %xor1 = xor <16 x i8> %v1, %v2
+ %and = and <16 x i8> %xor1, %c
+ %a = xor <16 x i8> %and, %v2
+ ret <16 x i8> %a
+}
+
+; CHECK-LABEL: bitselect_xor_reversed_v16i8:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_reversed_v16i8 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
+ %xor1 = xor <16 x i8> %v1, %v2
+ %notc = xor <16 x i8> %c, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                            i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %and = and <16 x i8> %xor1, %notc
+ %a = xor <16 x i8> %and, %v2
+ ret <16 x i8> %a
+}
+
 ; ==============================================================================
 ; 8 x i16
 ; ==============================================================================
@@ -659,6 +692,39 @@
   ret <8 x i16> %a
 }
 
+; CHECK-LABEL: bitselect_xor_v8i16:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_v8i16 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
+ %xor1 = xor <8 x i16> %v1, %v2
+ %and = and <8 x i16> %xor1, %c
+ %a = xor <8 x i16> %and, %v2
+ ret <8 x i16> %a
+}
+
+; CHECK-LABEL: bitselect_xor_reversed_v8i16:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_reversed_v8i16 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
+ %xor1 = xor <8 x i16> %v1, %v2
+ %notc = xor <8 x i16> %c, <i16 -1, i16 -1, i16 -1, i16 -1,
+                            i16 -1, i16 -1, i16 -1, i16 -1>
+ %and = and <8 x i16> %xor1, %notc
+ %a = xor <8 x i16> %and, %v2
+ ret <8 x i16> %a
+}
+
 ; CHECK-LABEL: extmul_low_s_v8i16:
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype extmul_low_s_v8i16 (v128, v128) -> (v128){{$}}
@@ -998,6 +1064,38 @@
   ret <4 x i32> %a
 }
 
+; CHECK-LABEL: bitselect_xor_v4i32:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_v4i32 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
+ %xor1 = xor <4 x i32> %v1, %v2
+ %and = and <4 x i32> %xor1, %c
+ %a = xor <4 x i32> %and, %v2
+ ret <4 x i32> %a
+}
+
+; CHECK-LABEL: bitselect_xor_reversed_v4i32:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_reversed_v4i32 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
+ %xor1 = xor <4 x i32> %v1, %v2
+ %notc = xor <4 x i32> %c, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %and = and <4 x i32> %xor1, %notc
+ %a = xor <4 x i32> %and, %v2
+ ret <4 x i32> %a
+}
+
 ; CHECK-LABEL: extmul_low_s_v4i32:
 ; NO-SIMD128-NOT: i32x4
 ; SIMD128-NEXT: .functype extmul_low_s_v4i32 (v128, v128) -> (v128){{$}}
@@ -1390,6 +1488,38 @@
   ret <2 x i64> %a
 }
 
+; CHECK-LABEL: bitselect_xor_v2i64:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_v2i64 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <2 x i64> @bitselect_xor_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
+ %xor1 = xor <2 x i64> %v1, %v2
+ %and = and <2 x i64> %xor1, %c
+ %a = xor <2 x i64> %and, %v2
+ ret <2 x i64> %a
+}
+
+; CHECK-LABEL: bitselect_xor_reversed_v2i64:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_reversed_v2i64 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <2 x i64> @bitselect_xor_reversed_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
+ %xor1 = xor <2 x i64> %v1, %v2
+ %notc = xor <2 x i64> %c, <i64 -1, i64 -1>
+ %and = and <2 x i64> %xor1, %notc
+ %a = xor <2 x i64> %and, %v2
+ ret <2 x i64> %a
+}
+
 ; CHECK-LABEL: extmul_low_s_v2i64:
 ; NO-SIMD128-NOT: i64x2
 ; SIMD128-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}}