| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64-none-elf < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD |
| ; RUN: llc -mtriple=aarch64-none-elf -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI |
| |
| define <2 x i32> @test_v2i64(<2 x i64> %n) { |
| ; CHECK-SD-LABEL: test_v2i64: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #35 |
| ; CHECK-SD-NEXT: xtn v0.2s, v0.2d |
| ; CHECK-SD-NEXT: usra v0.2s, v0.2s, #31 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: test_v2i64: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: ushr v1.2d, v0.2d, #63 |
| ; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #35 |
| ; CHECK-GI-NEXT: xtn v1.2s, v1.2d |
| ; CHECK-GI-NEXT: xtn v0.2s, v0.2d |
| ; CHECK-GI-NEXT: add v0.2s, v1.2s, v0.2s |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %shr = lshr <2 x i64> %n, splat (i64 63) |
| %vmovn.i4 = trunc nuw nsw <2 x i64> %shr to <2 x i32> |
| %shr1 = ashr <2 x i64> %n, splat (i64 35) |
| %vmovn.i = trunc nsw <2 x i64> %shr1 to <2 x i32> |
| %add = add nsw <2 x i32> %vmovn.i4, %vmovn.i |
| ret <2 x i32> %add |
| } |
| |
| define <4 x i16> @test_v4i32(<4 x i32> %n) { |
| ; CHECK-SD-LABEL: test_v4i32: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #17 |
| ; CHECK-SD-NEXT: xtn v0.4h, v0.4s |
| ; CHECK-SD-NEXT: usra v0.4h, v0.4h, #15 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: test_v4i32: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: ushr v1.4s, v0.4s, #31 |
| ; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #17 |
| ; CHECK-GI-NEXT: xtn v1.4h, v1.4s |
| ; CHECK-GI-NEXT: xtn v0.4h, v0.4s |
| ; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %shr = lshr <4 x i32> %n, splat (i32 31) |
| %vmovn.i4 = trunc nuw nsw <4 x i32> %shr to <4 x i16> |
| %shr1 = ashr <4 x i32> %n, splat (i32 17) |
| %vmovn.i = trunc nsw <4 x i32> %shr1 to <4 x i16> |
| %add = add nsw <4 x i16> %vmovn.i4, %vmovn.i |
| ret <4 x i16> %add |
| } |
| |
| define <8 x i8> @test_v8i16(<8 x i16> %n) { |
| ; CHECK-SD-LABEL: test_v8i16: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #9 |
| ; CHECK-SD-NEXT: xtn v0.8b, v0.8h |
| ; CHECK-SD-NEXT: usra v0.8b, v0.8b, #7 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: test_v8i16: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: ushr v1.8h, v0.8h, #15 |
| ; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #9 |
| ; CHECK-GI-NEXT: xtn v1.8b, v1.8h |
| ; CHECK-GI-NEXT: xtn v0.8b, v0.8h |
| ; CHECK-GI-NEXT: add v0.8b, v1.8b, v0.8b |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %shr = lshr <8 x i16> %n, splat (i16 15) |
| %vmovn.i4 = trunc nuw nsw <8 x i16> %shr to <8 x i8> |
| %shr1 = ashr <8 x i16> %n, splat (i16 9) |
| %vmovn.i = trunc nsw <8 x i16> %shr1 to <8 x i8> |
| %add = add nsw <8 x i8> %vmovn.i4, %vmovn.i |
| ret <8 x i8> %add |
| } |
| |
| define <2 x i32> @test_v2i64_smallsrl(<2 x i64> %n) { |
| ; CHECK-LABEL: test_v2i64_smallsrl: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ushr v1.2d, v0.2d, #62 |
| ; CHECK-NEXT: sshr v0.2d, v0.2d, #35 |
| ; CHECK-NEXT: xtn v1.2s, v1.2d |
| ; CHECK-NEXT: xtn v0.2s, v0.2d |
| ; CHECK-NEXT: add v0.2s, v1.2s, v0.2s |
| ; CHECK-NEXT: ret |
| entry: |
| %shr = lshr <2 x i64> %n, splat (i64 62) |
| %vmovn.i4 = trunc nuw nsw <2 x i64> %shr to <2 x i32> |
| %shr1 = ashr <2 x i64> %n, splat (i64 35) |
| %vmovn.i = trunc nsw <2 x i64> %shr1 to <2 x i32> |
| %add = add nsw <2 x i32> %vmovn.i4, %vmovn.i |
| ret <2 x i32> %add |
| } |
| |
| define <2 x i32> @test_v2i64_smallsra(<2 x i64> %n) { |
| ; CHECK-LABEL: test_v2i64_smallsra: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ushr v1.2d, v0.2d, #63 |
| ; CHECK-NEXT: shrn v0.2s, v0.2d, #27 |
| ; CHECK-NEXT: xtn v1.2s, v1.2d |
| ; CHECK-NEXT: add v0.2s, v1.2s, v0.2s |
| ; CHECK-NEXT: ret |
| entry: |
| %shr = lshr <2 x i64> %n, splat (i64 63) |
| %vmovn.i4 = trunc nuw nsw <2 x i64> %shr to <2 x i32> |
| %shr1 = ashr <2 x i64> %n, splat (i64 27) |
| %vmovn.i = trunc nsw <2 x i64> %shr1 to <2 x i32> |
| %add = add nsw <2 x i32> %vmovn.i4, %vmovn.i |
| ret <2 x i32> %add |
| } |
| |