|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | 
|  | ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck --check-prefixes=CHECK,SVE %s | 
|  | ; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s -o - | FileCheck --check-prefixes=CHECK,SVE2 %s | 
|  |  | 
|  | ; Wrong add/shift amount. Should be 32 for shift of 6. | 
|  | define <vscale x 2 x i64> @neg_urshr_1(<vscale x 2 x i64> %x) { | 
|  | ; CHECK-LABEL: neg_urshr_1: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    add z0.d, z0.d, #16 // =0x10 | 
|  | ; CHECK-NEXT:    lsr z0.d, z0.d, #6 | 
|  | ; CHECK-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 16) | 
|  | %sh = lshr <vscale x 2 x i64> %add, splat (i64 6) | 
|  | ret <vscale x 2 x i64> %sh | 
|  | } | 
|  |  | 
|  | ; Vector Shift. | 
|  | define <vscale x 2 x i64> @neg_urshr_2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { | 
|  | ; CHECK-LABEL: neg_urshr_2: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    add z0.d, z0.d, #32 // =0x20 | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z1.d | 
|  | ; CHECK-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 32) | 
|  | %sh = lshr <vscale x 2 x i64> %add, %y | 
|  | ret <vscale x 2 x i64> %sh | 
|  | } | 
|  |  | 
|  | ; Vector Add. | 
|  | define <vscale x 2 x i64> @neg_urshr_3(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { | 
|  | ; CHECK-LABEL: neg_urshr_3: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    add z0.d, z0.d, z1.d | 
|  | ; CHECK-NEXT:    lsr z0.d, z0.d, #6 | 
|  | ; CHECK-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 2 x i64> %x, %y | 
|  | %sh = lshr <vscale x 2 x i64> %add, splat (i64 6) | 
|  | ret <vscale x 2 x i64> %sh | 
|  | } | 
|  |  | 
|  | ; Add has two uses. | 
|  | define <vscale x 2 x i64> @neg_urshr_4(<vscale x 2 x i64> %x, ptr %p) { | 
|  | ; CHECK-LABEL: neg_urshr_4: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    mov z1.d, z0.d | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    add z1.d, z1.d, #32 // =0x20 | 
|  | ; CHECK-NEXT:    lsr z0.d, z1.d, #6 | 
|  | ; CHECK-NEXT:    st1d { z1.d }, p0, [x0] | 
|  | ; CHECK-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 32) | 
|  | %sh = lshr <vscale x 2 x i64> %add, splat (i64 6) | 
|  | store <vscale x 2 x i64> %add, ptr %p | 
|  | ret <vscale x 2 x i64> %sh | 
|  | } | 
|  |  | 
|  | ; Add can overflow. | 
|  | define <vscale x 2 x i64> @neg_urshr_5(<vscale x 2 x i64> %x) { | 
|  | ; CHECK-LABEL: neg_urshr_5: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    add z0.d, z0.d, #32 // =0x20 | 
|  | ; CHECK-NEXT:    lsr z0.d, z0.d, #6 | 
|  | ; CHECK-NEXT:    ret | 
|  | %add = add <vscale x 2 x i64> %x, splat (i64 32) | 
|  | %sh = lshr <vscale x 2 x i64> %add, splat (i64 6) | 
|  | ret <vscale x 2 x i64> %sh | 
|  | } | 
|  |  | 
|  | define <vscale x 16 x i8> @urshr_i8(<vscale x 16 x i8> %x) { | 
|  | ; SVE-LABEL: urshr_i8: | 
|  | ; SVE:       // %bb.0: | 
|  | ; SVE-NEXT:    add z0.b, z0.b, #32 // =0x20 | 
|  | ; SVE-NEXT:    lsr z0.b, z0.b, #6 | 
|  | ; SVE-NEXT:    ret | 
|  | ; | 
|  | ; SVE2-LABEL: urshr_i8: | 
|  | ; SVE2:       // %bb.0: | 
|  | ; SVE2-NEXT:    ptrue p0.b | 
|  | ; SVE2-NEXT:    urshr z0.b, p0/m, z0.b, #6 | 
|  | ; SVE2-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 16 x i8> %x, splat (i8 32) | 
|  | %sh = lshr <vscale x 16 x i8> %add, splat (i8 6) | 
|  | ret <vscale x 16 x i8> %sh | 
|  | } | 
|  |  | 
|  | define <vscale x 16 x i8> @urshr_8_wide_trunc(<vscale x 16 x i8> %x) { | 
|  | ; SVE-LABEL: urshr_8_wide_trunc: | 
|  | ; SVE:       // %bb.0: | 
|  | ; SVE-NEXT:    uunpkhi z1.h, z0.b | 
|  | ; SVE-NEXT:    uunpklo z0.h, z0.b | 
|  | ; SVE-NEXT:    add z0.h, z0.h, #32 // =0x20 | 
|  | ; SVE-NEXT:    add z1.h, z1.h, #32 // =0x20 | 
|  | ; SVE-NEXT:    lsr z1.h, z1.h, #6 | 
|  | ; SVE-NEXT:    lsr z0.h, z0.h, #6 | 
|  | ; SVE-NEXT:    uzp1 z0.b, z0.b, z1.b | 
|  | ; SVE-NEXT:    ret | 
|  | ; | 
|  | ; SVE2-LABEL: urshr_8_wide_trunc: | 
|  | ; SVE2:       // %bb.0: | 
|  | ; SVE2-NEXT:    ptrue p0.b | 
|  | ; SVE2-NEXT:    urshr z0.b, p0/m, z0.b, #6 | 
|  | ; SVE2-NEXT:    ret | 
|  | %ext = zext <vscale x 16 x i8> %x to <vscale x 16 x i16> | 
|  | %add = add nuw nsw <vscale x 16 x i16> %ext, splat (i16 32) | 
|  | %sh = lshr <vscale x 16 x i16> %add, splat (i16 6) | 
|  | %sht = trunc <vscale x 16 x i16> %sh to <vscale x 16 x i8> | 
|  | ret <vscale x 16 x i8> %sht | 
|  | } | 
|  |  | 
|  | define <vscale x 16 x i8> @urshr_8_wide_trunc_nomerge(<vscale x 16 x i16> %ext) { | 
|  | ; SVE-LABEL: urshr_8_wide_trunc_nomerge: | 
|  | ; SVE:       // %bb.0: | 
|  | ; SVE-NEXT:    add z0.h, z0.h, #256 // =0x100 | 
|  | ; SVE-NEXT:    add z1.h, z1.h, #256 // =0x100 | 
|  | ; SVE-NEXT:    lsr z1.h, z1.h, #9 | 
|  | ; SVE-NEXT:    lsr z0.h, z0.h, #9 | 
|  | ; SVE-NEXT:    uzp1 z0.b, z0.b, z1.b | 
|  | ; SVE-NEXT:    ret | 
|  | ; | 
|  | ; SVE2-LABEL: urshr_8_wide_trunc_nomerge: | 
|  | ; SVE2:       // %bb.0: | 
|  | ; SVE2-NEXT:    ptrue p0.h | 
|  | ; SVE2-NEXT:    urshr z1.h, p0/m, z1.h, #9 | 
|  | ; SVE2-NEXT:    urshr z0.h, p0/m, z0.h, #9 | 
|  | ; SVE2-NEXT:    uzp1 z0.b, z0.b, z1.b | 
|  | ; SVE2-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 16 x i16> %ext, splat (i16 256) | 
|  | %sh = lshr <vscale x 16 x i16> %add, splat (i16 9) | 
|  | %sht = trunc <vscale x 16 x i16> %sh to <vscale x 16 x i8> | 
|  | ret <vscale x 16 x i8> %sht | 
|  | } | 
|  |  | 
|  | define <vscale x 8 x i16> @urshr_i16(<vscale x 8 x i16> %x) { | 
|  | ; SVE-LABEL: urshr_i16: | 
|  | ; SVE:       // %bb.0: | 
|  | ; SVE-NEXT:    add z0.h, z0.h, #32 // =0x20 | 
|  | ; SVE-NEXT:    lsr z0.h, z0.h, #6 | 
|  | ; SVE-NEXT:    ret | 
|  | ; | 
|  | ; SVE2-LABEL: urshr_i16: | 
|  | ; SVE2:       // %bb.0: | 
|  | ; SVE2-NEXT:    ptrue p0.h | 
|  | ; SVE2-NEXT:    urshr z0.h, p0/m, z0.h, #6 | 
|  | ; SVE2-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 8 x i16> %x, splat (i16 32) | 
|  | %sh = lshr <vscale x 8 x i16> %add, splat (i16 6) | 
|  | ret <vscale x 8 x i16> %sh | 
|  | } | 
|  |  | 
|  | define <vscale x 8 x i16> @urshr_16_wide_trunc(<vscale x 8 x i16> %x) { | 
|  | ; SVE-LABEL: urshr_16_wide_trunc: | 
|  | ; SVE:       // %bb.0: | 
|  | ; SVE-NEXT:    uunpkhi z1.s, z0.h | 
|  | ; SVE-NEXT:    uunpklo z0.s, z0.h | 
|  | ; SVE-NEXT:    add z0.s, z0.s, #32 // =0x20 | 
|  | ; SVE-NEXT:    add z1.s, z1.s, #32 // =0x20 | 
|  | ; SVE-NEXT:    lsr z1.s, z1.s, #6 | 
|  | ; SVE-NEXT:    lsr z0.s, z0.s, #6 | 
|  | ; SVE-NEXT:    uzp1 z0.h, z0.h, z1.h | 
|  | ; SVE-NEXT:    ret | 
|  | ; | 
|  | ; SVE2-LABEL: urshr_16_wide_trunc: | 
|  | ; SVE2:       // %bb.0: | 
|  | ; SVE2-NEXT:    ptrue p0.h | 
|  | ; SVE2-NEXT:    urshr z0.h, p0/m, z0.h, #6 | 
|  | ; SVE2-NEXT:    ret | 
|  | %ext = zext <vscale x 8 x i16> %x to <vscale x 8 x i32> | 
|  | %add = add nuw nsw <vscale x 8 x i32> %ext, splat (i32 32) | 
|  | %sh = lshr <vscale x 8 x i32> %add, splat (i32 6) | 
|  | %sht = trunc <vscale x 8 x i32> %sh to <vscale x 8 x i16> | 
|  | ret <vscale x 8 x i16> %sht | 
|  | } | 
|  |  | 
|  | define <vscale x 8 x i16> @urshr_16_wide_trunc_nomerge(<vscale x 8 x i32> %ext) { | 
|  | ; SVE-LABEL: urshr_16_wide_trunc_nomerge: | 
|  | ; SVE:       // %bb.0: | 
|  | ; SVE-NEXT:    mov z2.s, #0x10000 | 
|  | ; SVE-NEXT:    add z0.s, z0.s, z2.s | 
|  | ; SVE-NEXT:    add z1.s, z1.s, z2.s | 
|  | ; SVE-NEXT:    lsr z1.s, z1.s, #17 | 
|  | ; SVE-NEXT:    lsr z0.s, z0.s, #17 | 
|  | ; SVE-NEXT:    uzp1 z0.h, z0.h, z1.h | 
|  | ; SVE-NEXT:    ret | 
|  | ; | 
|  | ; SVE2-LABEL: urshr_16_wide_trunc_nomerge: | 
|  | ; SVE2:       // %bb.0: | 
|  | ; SVE2-NEXT:    ptrue p0.s | 
|  | ; SVE2-NEXT:    urshr z1.s, p0/m, z1.s, #17 | 
|  | ; SVE2-NEXT:    urshr z0.s, p0/m, z0.s, #17 | 
|  | ; SVE2-NEXT:    uzp1 z0.h, z0.h, z1.h | 
|  | ; SVE2-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 8 x i32> %ext, splat (i32 65536) | 
|  | %sh = lshr <vscale x 8 x i32> %add, splat (i32 17) | 
|  | %sht = trunc <vscale x 8 x i32> %sh to <vscale x 8 x i16> | 
|  | ret <vscale x 8 x i16> %sht | 
|  | } | 
|  |  | 
|  | define <vscale x 4 x i32> @urshr_i32(<vscale x 4 x i32> %x) { | 
|  | ; SVE-LABEL: urshr_i32: | 
|  | ; SVE:       // %bb.0: | 
|  | ; SVE-NEXT:    add z0.s, z0.s, #32 // =0x20 | 
|  | ; SVE-NEXT:    lsr z0.s, z0.s, #6 | 
|  | ; SVE-NEXT:    ret | 
|  | ; | 
|  | ; SVE2-LABEL: urshr_i32: | 
|  | ; SVE2:       // %bb.0: | 
|  | ; SVE2-NEXT:    ptrue p0.s | 
|  | ; SVE2-NEXT:    urshr z0.s, p0/m, z0.s, #6 | 
|  | ; SVE2-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 4 x i32> %x, splat (i32 32) | 
|  | %sh = lshr <vscale x 4 x i32> %add, splat (i32 6) | 
|  | ret <vscale x 4 x i32> %sh | 
|  | } | 
|  |  | 
|  | define <vscale x 4 x i32> @urshr_32_wide_trunc(<vscale x 4 x i32> %x) { | 
|  | ; SVE-LABEL: urshr_32_wide_trunc: | 
|  | ; SVE:       // %bb.0: | 
|  | ; SVE-NEXT:    uunpkhi z1.d, z0.s | 
|  | ; SVE-NEXT:    uunpklo z0.d, z0.s | 
|  | ; SVE-NEXT:    add z0.d, z0.d, #32 // =0x20 | 
|  | ; SVE-NEXT:    add z1.d, z1.d, #32 // =0x20 | 
|  | ; SVE-NEXT:    lsr z1.d, z1.d, #6 | 
|  | ; SVE-NEXT:    lsr z0.d, z0.d, #6 | 
|  | ; SVE-NEXT:    uzp1 z0.s, z0.s, z1.s | 
|  | ; SVE-NEXT:    ret | 
|  | ; | 
|  | ; SVE2-LABEL: urshr_32_wide_trunc: | 
|  | ; SVE2:       // %bb.0: | 
|  | ; SVE2-NEXT:    ptrue p0.s | 
|  | ; SVE2-NEXT:    urshr z0.s, p0/m, z0.s, #6 | 
|  | ; SVE2-NEXT:    ret | 
|  | %ext = zext <vscale x 4 x i32> %x to <vscale x 4 x i64> | 
|  | %add = add nuw nsw <vscale x 4 x i64> %ext, splat (i64 32) | 
|  | %sh = lshr <vscale x 4 x i64> %add, splat (i64 6) | 
|  | %sht = trunc <vscale x 4 x i64> %sh to <vscale x 4 x i32> | 
|  | ret <vscale x 4 x i32> %sht | 
|  | } | 
|  |  | 
|  | define <vscale x 4 x i32> @urshr_32_wide_trunc_nomerge(<vscale x 4 x i64> %ext) { | 
|  | ; SVE-LABEL: urshr_32_wide_trunc_nomerge: | 
|  | ; SVE:       // %bb.0: | 
|  | ; SVE-NEXT:    mov z2.d, #0x100000000 | 
|  | ; SVE-NEXT:    add z0.d, z0.d, z2.d | 
|  | ; SVE-NEXT:    add z1.d, z1.d, z2.d | 
|  | ; SVE-NEXT:    lsr z1.d, z1.d, #33 | 
|  | ; SVE-NEXT:    lsr z0.d, z0.d, #33 | 
|  | ; SVE-NEXT:    uzp1 z0.s, z0.s, z1.s | 
|  | ; SVE-NEXT:    ret | 
|  | ; | 
|  | ; SVE2-LABEL: urshr_32_wide_trunc_nomerge: | 
|  | ; SVE2:       // %bb.0: | 
|  | ; SVE2-NEXT:    ptrue p0.d | 
|  | ; SVE2-NEXT:    urshr z1.d, p0/m, z1.d, #33 | 
|  | ; SVE2-NEXT:    urshr z0.d, p0/m, z0.d, #33 | 
|  | ; SVE2-NEXT:    uzp1 z0.s, z0.s, z1.s | 
|  | ; SVE2-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 4 x i64> %ext, splat (i64 4294967296) | 
|  | %sh = lshr <vscale x 4 x i64> %add, splat (i64 33) | 
|  | %sht = trunc <vscale x 4 x i64> %sh to <vscale x 4 x i32> | 
|  | ret <vscale x 4 x i32> %sht | 
|  | } | 
|  |  | 
|  | define <vscale x 2 x i64> @urshr_i64(<vscale x 2 x i64> %x) { | 
|  | ; SVE-LABEL: urshr_i64: | 
|  | ; SVE:       // %bb.0: | 
|  | ; SVE-NEXT:    add z0.d, z0.d, #32 // =0x20 | 
|  | ; SVE-NEXT:    lsr z0.d, z0.d, #6 | 
|  | ; SVE-NEXT:    ret | 
|  | ; | 
|  | ; SVE2-LABEL: urshr_i64: | 
|  | ; SVE2:       // %bb.0: | 
|  | ; SVE2-NEXT:    ptrue p0.d | 
|  | ; SVE2-NEXT:    urshr z0.d, p0/m, z0.d, #6 | 
|  | ; SVE2-NEXT:    ret | 
|  | %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 32) | 
|  | %sh = lshr <vscale x 2 x i64> %add, splat (i64 6) | 
|  | ret <vscale x 2 x i64> %sh | 
|  | } |