| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s |
| |
| ; Test that SIMD shifts can be lowered correctly even when shift |
| ; values are exported from outside blocks. |
| |
| target triple = "wasm32-unknown-unknown" |
| |
| define void @shl_loop(ptr %a, i8 %shift, i32 %count) { |
| ; CHECK-LABEL: shl_loop: |
| ; CHECK: .functype shl_loop (i32, i32, i32) -> () |
| ; CHECK-NEXT: # %bb.0: # %entry |
| ; CHECK-NEXT: .LBB0_1: # %body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: loop # label0: |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: v128.load 0:p2align=0 |
| ; CHECK-NEXT: local.get 1 |
| ; CHECK-NEXT: i8x16.shl |
| ; CHECK-NEXT: v128.store 16 |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i32.const 16 |
| ; CHECK-NEXT: i32.add |
| ; CHECK-NEXT: local.set 0 |
| ; CHECK-NEXT: local.get 2 |
| ; CHECK-NEXT: i32.const -1 |
| ; CHECK-NEXT: i32.add |
| ; CHECK-NEXT: local.tee 2 |
| ; CHECK-NEXT: i32.eqz |
| ; CHECK-NEXT: br_if 0 # 0: up to label0 |
| ; CHECK-NEXT: # %bb.2: # %exit |
| ; CHECK-NEXT: end_loop |
| ; CHECK-NEXT: # fallthrough-return |
| entry: |
| %t1 = insertelement <16 x i8> undef, i8 %shift, i32 0 |
| %vshift = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer |
| br label %body |
| body: |
| %out = phi ptr [%a, %entry], [%b, %body] |
| %i = phi i32 [0, %entry], [%next, %body] |
| %v = load <16 x i8>, ptr %out, align 1 |
| %r = shl <16 x i8> %v, %vshift |
| %b = getelementptr inbounds i8, ptr %out, i32 16 |
| store <16 x i8> %r, ptr %b |
| %next = add i32 %i, 1 |
| %i.cmp = icmp eq i32 %next, %count |
| br i1 %i.cmp, label %body, label %exit |
| exit: |
| ret void |
| } |
| |
| ; Test that SIMD shifts can be lowered correctly when shift value |
| ; is a phi inside loop body. |
| |
| define void @shl_phi_loop(ptr %a, i8 %shift, i32 %count) { |
| ; CHECK-LABEL: shl_phi_loop: |
| ; CHECK: .functype shl_phi_loop (i32, i32, i32) -> () |
| ; CHECK-NEXT: # %bb.0: # %entry |
| ; CHECK-NEXT: .LBB1_1: # %body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: loop # label1: |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: v128.load 0:p2align=0 |
| ; CHECK-NEXT: local.get 1 |
| ; CHECK-NEXT: i8x16.shl |
| ; CHECK-NEXT: v128.store 16 |
| ; CHECK-NEXT: local.get 1 |
| ; CHECK-NEXT: i32.const 1 |
| ; CHECK-NEXT: i32.and |
| ; CHECK-NEXT: local.set 1 |
| ; CHECK-NEXT: local.get 0 |
| ; CHECK-NEXT: i32.const 16 |
| ; CHECK-NEXT: i32.add |
| ; CHECK-NEXT: local.set 0 |
| ; CHECK-NEXT: local.get 2 |
| ; CHECK-NEXT: i32.const -1 |
| ; CHECK-NEXT: i32.add |
| ; CHECK-NEXT: local.tee 2 |
| ; CHECK-NEXT: i32.eqz |
| ; CHECK-NEXT: br_if 0 # 0: up to label1 |
| ; CHECK-NEXT: # %bb.2: # %exit |
| ; CHECK-NEXT: end_loop |
| ; CHECK-NEXT: # fallthrough-return |
| entry: |
| br label %body |
| body: |
| %out = phi ptr [%a, %entry], [%b, %body] |
| %i = phi i32 [0, %entry], [%next, %body] |
| %t1 = phi i8 [%shift, %entry], [%sand, %body] |
| %t2 = insertelement <16 x i8> undef, i8 %t1, i32 0 |
| %vshift = shufflevector <16 x i8> %t2, <16 x i8> undef, <16 x i32> zeroinitializer |
| %v = load <16 x i8>, ptr %out, align 1 |
| %r = shl <16 x i8> %v, %vshift |
| %b = getelementptr inbounds i8, ptr %out, i32 16 |
| store <16 x i8> %r, ptr %b |
| %sand = and i8 %t1, 1 |
| %next = add i32 %i, 1 |
| %i.cmp = icmp eq i32 %next, %count |
| br i1 %i.cmp, label %body, label %exit |
| exit: |
| ret void |
| } |