| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD |
| ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI |
| |
| ; CHECK-GI: warning: Instruction selection used fallback path for extract_v4i32_vector_insert |
| ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_insert_const |
| ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_extract |
| ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_extract_const |
| |
| define i64 @extract_v2i64_undef_index(<2 x i64> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v2i64_undef_index: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: fmov x0, d0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v2i64_undef_index: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: str q0, [sp, #-16]! |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: ldr x0, [sp], #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %d = extractelement <2 x i64> %a, i32 undef |
| ret i64 %d |
| } |
| |
| define i64 @extract_v2i64_undef_vector(<2 x i64> %a, i32 %c) { |
| ; CHECK-LABEL: extract_v2i64_undef_vector: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ret |
| entry: |
| %d = extractelement <2 x i64> undef, i32 %c |
| ret i64 %d |
| } |
| |
| define i64 @extract_v2i64_opaque(<2 x i64> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v2i64_opaque: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: bfi x8, x0, #3, #1 |
| ; CHECK-SD-NEXT: ldr x0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v2i64_opaque: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: mov w9, w0 |
| ; CHECK-GI-NEXT: mov x8, sp |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: and x9, x9, #0x1 |
| ; CHECK-GI-NEXT: ldr x0, [x8, x9, lsl #3] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %d = extractelement <2 x i64> %a, i32 %c |
| ret i64 %d |
| } |
| |
| define i64 @extract_v2i64_oob(<2 x i64> %a, i32 %c) { |
| ; CHECK-LABEL: extract_v2i64_oob: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ret |
| entry: |
| %d = extractelement <2 x i64> %a, i32 5 |
| ret i64 %d |
| } |
| |
| define i64 @extract_v2i64_freeze(<2 x i64> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v2i64_freeze: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: bfi x8, x0, #3, #1 |
| ; CHECK-SD-NEXT: ldr x0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v2i64_freeze: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: mov w9, w0 |
| ; CHECK-GI-NEXT: mov x8, sp |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: and x9, x9, #0x1 |
| ; CHECK-GI-NEXT: ldr x0, [x8, x9, lsl #3] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %fvector = freeze <2 x i64> %a |
| %d = extractelement <2 x i64> %fvector, i32 %c |
| ret i64 %d |
| } |
| |
| define i64 @extract_v2i64_extract_of_insert(<2 x i64> %a, i64 %element, i64 %c) { |
| ; CHECK-LABEL: extract_v2i64_extract_of_insert: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ret |
| entry: |
| %vector = insertelement <2 x i64> %a, i64 %element, i64 %c |
| %d = extractelement <2 x i64> %vector, i64 %c |
| ret i64 %d |
| } |
| |
| define i64 @extract_v2i64_extract_of_insert_different_const(<2 x i64> %a, i64 %element) { |
| ; CHECK-SD-LABEL: extract_v2i64_extract_of_insert_different_const: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: mov x0, v0.d[1] |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v2i64_extract_of_insert_different_const: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: mov d0, v0.d[1] |
| ; CHECK-GI-NEXT: fmov x0, d0 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = insertelement <2 x i64> %a, i64 %element, i64 0 |
| %d = extractelement <2 x i64> %vector, i64 1 |
| ret i64 %d |
| } |
| |
| define i64 @extract_v2i64_extract_build_vector_const(<2 x i64> %a, i32 %c) { |
| ; CHECK-LABEL: extract_v2i64_extract_build_vector_const: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov w0, #11 // =0xb |
| ; CHECK-NEXT: ret |
| entry: |
| %d = extractelement <2 x i64> <i64 42, i64 11>, i32 1 |
| ret i64 %d |
| } |
| |
| define i64 @extract_v2i64_extract_build_vector_opaque(<2 x i64> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v2i64_extract_build_vector_opaque: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI8_0 |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: bfi x8, x0, #3, #1 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr x0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v2i64_extract_build_vector_opaque: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI8_0 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: and x8, x8, #0x1 |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr x0, [x9, x8, lsl #3] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %d = extractelement <2 x i64> <i64 42, i64 11>, i32 %c |
| ret i64 %d |
| } |
| |
| |
| define i64 @extract_v2i32_zext(<2 x i32> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v2i32_zext: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: bfi x8, x0, #3, #1 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr x0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v2i32_zext: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 |
| ; CHECK-GI-NEXT: mov w9, w0 |
| ; CHECK-GI-NEXT: mov x8, sp |
| ; CHECK-GI-NEXT: and x9, x9, #0x1 |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr x0, [x8, x9, lsl #3] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %zvector = zext <2 x i32> %a to <2 x i64> |
| %d = extractelement <2 x i64> %zvector, i32 %c |
| ret i64 %d |
| } |
| |
| define i64 @extract_v2double_fptosi(<2 x double> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v2double_fptosi: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: bfi x8, x0, #3, #1 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr x0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v2double_fptosi: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: fcvtzs v0.2d, v0.2d |
| ; CHECK-GI-NEXT: mov w9, w0 |
| ; CHECK-GI-NEXT: mov x8, sp |
| ; CHECK-GI-NEXT: and x9, x9, #0x1 |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr x0, [x8, x9, lsl #3] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = fptosi <2 x double> %a to <2 x i64> |
| %d = extractelement <2 x i64> %vector, i32 %c |
| ret i64 %d |
| } |
| |
| define double @extract_v2double_fneg(<2 x double> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v2double_fneg: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: fneg v0.2d, v0.2d |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: bfi x8, x0, #3, #1 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr d0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v2double_fneg: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: fneg v0.2d, v0.2d |
| ; CHECK-GI-NEXT: mov w9, w0 |
| ; CHECK-GI-NEXT: mov x8, sp |
| ; CHECK-GI-NEXT: and x9, x9, #0x1 |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr d0, [x8, x9, lsl #3] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = fneg <2 x double> %a |
| %d = extractelement <2 x double> %vector, i32 %c |
| ret double %d |
| } |
| |
| define i32 @extract_v4i32_add(<4 x i32> %a, <4 x i32> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_add: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI12_0 |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr w0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_add: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI12_0 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = add <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6> |
| %d = extractelement <4 x i32> %vector, i32 %c |
| ret i32 %d |
| } |
| |
| define float @extract_v4i32_minimum(<4 x float> %a, <4 x float> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_minimum: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: fmin v0.4s, v0.4s, v1.4s |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr s0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_minimum: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: fmin v0.4s, v0.4s, v1.4s |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr s0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> %b) |
| %d = extractelement <4 x float> %vector, i32 %c |
| ret float %d |
| } |
| |
| define float @extract_v4i32_minimum_build_vector(<4 x float> %a, <4 x float> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_minimum_build_vector: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI14_0 |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: fmin v0.4s, v0.4s, v1.4s |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr s0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_minimum_build_vector: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI14_0 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: fmin v0.4s, v0.4s, v1.4s |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr s0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>) |
| %d = extractelement <4 x float> %vector, i32 %c |
| ret float %d |
| } |
| |
| define float @extract_v4i32_minimum_build_vector_const(<4 x float> %a, <4 x float> %b, i32 %c) { |
| ; CHECK-LABEL: extract_v4i32_minimum_build_vector_const: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: adrp x8, .LCPI15_0 |
| ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] |
| ; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s |
| ; CHECK-NEXT: mov s0, v0.s[1] |
| ; CHECK-NEXT: ret |
| entry: |
| %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>) |
| %d = extractelement <4 x float> %vector, i32 1 |
| ret float %d |
| } |
| |
| define float @extract_v4i32_copysign_build_vector(<4 x float> %a, <4 x float> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_copysign_build_vector: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 |
| ; CHECK-SD-NEXT: mvni v1.4s, #128, lsl #24 |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr s0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_copysign_build_vector: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: mvni v1.4s, #128, lsl #24 |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr s0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = call <4 x float> @llvm.copysign.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>) |
| %d = extractelement <4 x float> %vector, i32 %c |
| ret float %d |
| } |
| |
| define float @extract_v4i32_copysign_build_vector_const(<4 x float> %a, <4 x float> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_copysign_build_vector_const: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: adrp x8, .LCPI17_0 |
| ; CHECK-SD-NEXT: mvni v1.4s, #128, lsl #24 |
| ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] |
| ; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b |
| ; CHECK-SD-NEXT: mov s0, v0.s[2] |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_copysign_build_vector_const: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: mvni v1.4s, #128, lsl #24 |
| ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b |
| ; CHECK-GI-NEXT: mov s0, v0.s[2] |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = call <4 x float> @llvm.copysign.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>) |
| %d = extractelement <4 x float> %vector, i32 2 |
| ret float %d |
| } |
| |
| |
| define i32 @extract_v4i32_icmp(<4 x i32> %a, <4 x i32> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_icmp: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI18_0 |
| ; CHECK-SD-NEXT: movi v2.4s, #1 |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: cmge v0.4s, v1.4s, v0.4s |
| ; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr w0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_icmp: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI18_0 |
| ; CHECK-GI-NEXT: movi v2.4s, #1 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: cmge v0.4s, v1.4s, v0.4s |
| ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = icmp sle <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6> |
| %zvector = zext <4 x i1> %vector to <4 x i32> |
| %d = extractelement <4 x i32> %zvector, i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_icmp_const(<4 x i32> %a, <4 x i32> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_icmp_const: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: adrp x8, .LCPI19_0 |
| ; CHECK-SD-NEXT: movi v2.4s, #1 |
| ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] |
| ; CHECK-SD-NEXT: cmge v0.4s, v1.4s, v0.4s |
| ; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-SD-NEXT: mov w0, v0.s[2] |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_icmp_const: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: adrp x8, .LCPI19_0 |
| ; CHECK-GI-NEXT: movi v2.4s, #1 |
| ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] |
| ; CHECK-GI-NEXT: cmge v0.4s, v1.4s, v0.4s |
| ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-GI-NEXT: mov s0, v0.s[2] |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = icmp sle <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6> |
| %zvector = zext <4 x i1> %vector to <4 x i32> |
| %d = extractelement <4 x i32> %zvector, i32 2 |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4float_fcmp(<4 x float> %a, <4 x float> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4float_fcmp: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: movi v1.4s, #1 |
| ; CHECK-SD-NEXT: fcmeq v0.4s, v0.4s, v0.4s |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr w0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4float_fcmp: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: fmov v1.4s, #1.00000000 |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s |
| ; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s |
| ; CHECK-GI-NEXT: movi v1.4s, #1 |
| ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b |
| ; CHECK-GI-NEXT: bic v0.16b, v1.16b, v0.16b |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = fcmp uno <4 x float> %a, <float 1.0, float 1.0, float 1.0, float 1.0> |
| %zvector = zext <4 x i1> %vector to <4 x i32> |
| %d = extractelement <4 x i32> %zvector, i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4float_fcmp_const(<4 x float> %a, <4 x float> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4float_fcmp_const: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: movi v1.4s, #1 |
| ; CHECK-SD-NEXT: fcmeq v0.4s, v0.4s, v0.4s |
| ; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b |
| ; CHECK-SD-NEXT: mov w0, v0.s[1] |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4float_fcmp_const: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: fmov v1.4s, #1.00000000 |
| ; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s |
| ; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s |
| ; CHECK-GI-NEXT: movi v1.4s, #1 |
| ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b |
| ; CHECK-GI-NEXT: bic v0.16b, v1.16b, v0.16b |
| ; CHECK-GI-NEXT: mov s0, v0.s[1] |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = fcmp uno <4 x float> %a, <float 1.0, float 1.0, float 1.0, float 1.0> |
| %zvector = zext <4 x i1> %vector to <4 x i32> |
| %d = extractelement <4 x i32> %zvector, i32 1 |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_select(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %cond) { |
| ; CHECK-SD-LABEL: extract_v4i32_select: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: ushll v1.4s, v2.4h, #0 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI22_0 |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: shl v1.4s, v1.4s, #31 |
| ; CHECK-SD-NEXT: cmlt v1.4s, v1.4s, #0 |
| ; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr w0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_select: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI22_0 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 |
| ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 |
| ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> <i32 42, i32 11, i32 17, i32 6> |
| %d = extractelement <4 x i32> %vector, i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_select_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %cond) { |
| ; CHECK-SD-LABEL: extract_v4i32_select_const: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: ushll v1.4s, v2.4h, #0 |
| ; CHECK-SD-NEXT: movi v2.4s, #17 |
| ; CHECK-SD-NEXT: shl v1.4s, v1.4s, #31 |
| ; CHECK-SD-NEXT: cmlt v1.4s, v1.4s, #0 |
| ; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b |
| ; CHECK-SD-NEXT: mov w0, v0.s[2] |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_select_const: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI23_0 |
| ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] |
| ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 |
| ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 |
| ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b |
| ; CHECK-GI-NEXT: mov s0, v0.s[2] |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> <i32 42, i32 11, i32 17, i32 6> |
| %d = extractelement <4 x i32> %vector, i32 2 |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_abs(<4 x float> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_abs: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: frintp v0.4s, v0.4s |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: frintm v0.4s, v0.4s |
| ; CHECK-SD-NEXT: fabs v0.4s, v0.4s |
| ; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s |
| ; CHECK-SD-NEXT: abs v0.4s, v0.4s |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr w0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_abs: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: frintp v0.4s, v0.4s |
| ; CHECK-GI-NEXT: mov w9, w0 |
| ; CHECK-GI-NEXT: mov x8, sp |
| ; CHECK-GI-NEXT: and x9, x9, #0x3 |
| ; CHECK-GI-NEXT: frintm v0.4s, v0.4s |
| ; CHECK-GI-NEXT: fabs v0.4s, v0.4s |
| ; CHECK-GI-NEXT: fcvtzs v0.4s, v0.4s |
| ; CHECK-GI-NEXT: abs v0.4s, v0.4s |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr w0, [x8, x9, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> %a) |
| %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil) |
| %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor) |
| %abs = fptosi <4 x float> %fabs to <4 x i32> |
| %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0) |
| %d = extractelement <4 x i32> %vector, i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_abs_const(<4 x float> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_abs_const: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: mov w0, #4 // =0x4 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_abs_const: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: adrp x8, .LCPI25_0 |
| ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI25_0] |
| ; CHECK-GI-NEXT: frintp v0.4s, v0.4s |
| ; CHECK-GI-NEXT: frintm v0.4s, v0.4s |
| ; CHECK-GI-NEXT: fabs v0.4s, v0.4s |
| ; CHECK-GI-NEXT: fcvtzs v0.4s, v0.4s |
| ; CHECK-GI-NEXT: abs v0.4s, v0.4s |
| ; CHECK-GI-NEXT: mov s0, v0.s[1] |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> <float 1.0, float 4.0, float 3.0, float 2.0>) |
| %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil) |
| %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor) |
| %abs = fptosi <4 x float> %fabs to <4 x i32> |
| %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0) |
| %d = extractelement <4 x i32> %vector, i32 1 |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_abs_half_const(<4 x float> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_abs_half_const: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI26_0 |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr w0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_abs_half_const: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI26_0 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: frintp v0.4s, v0.4s |
| ; CHECK-GI-NEXT: frintm v0.4s, v0.4s |
| ; CHECK-GI-NEXT: fabs v0.4s, v0.4s |
| ; CHECK-GI-NEXT: fcvtzs v0.4s, v0.4s |
| ; CHECK-GI-NEXT: abs v0.4s, v0.4s |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> <float 1.0, float 4.0, float 3.0, float 2.0>) |
| %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil) |
| %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor) |
| %abs = fptosi <4 x float> %fabs to <4 x i32> |
| %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0) |
| %d = extractelement <4 x i32> %vector, i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_vector_insert(<4 x i32> %a, <2 x i32> %b, i32 %c) { |
| ; CHECK-LABEL: extract_v4i32_vector_insert: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-NEXT: mov v1.d[1], v0.d[0] |
| ; CHECK-NEXT: str q1, [sp] |
| ; CHECK-NEXT: ldr w0, [x8] |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| entry: |
| %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0) |
| %d = extractelement <4 x i32> %vector, i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_vector_insert_const(<4 x i32> %a, <2 x i32> %b, i32 %c) { |
| ; CHECK-LABEL: extract_v4i32_vector_insert_const: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: mov w0, v1.s[1] |
| ; CHECK-NEXT: ret |
| entry: |
| %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0) |
| %d = extractelement <4 x i32> %vector, i32 1 |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_vector_extract(<4 x i32> %a, <2 x i32> %b, i32 %c) { |
| ; CHECK-LABEL: extract_v4i32_vector_extract: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-NEXT: str q0, [sp] |
| ; CHECK-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-NEXT: ldr w0, [x8] |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| entry: |
| %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0) |
| %d = extractelement <4 x i32> %vector, i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_vector_extract_const(<4 x i32> %a, <2 x i32> %b, i32 %c) { |
| ; CHECK-LABEL: extract_v4i32_vector_extract_const: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| entry: |
| %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0) |
| %d = extractelement <4 x i32> %vector, i32 0 |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_load(<4 x i32> %a, <2 x i32> %b, i32 %c, ptr %arg) { |
| ; CHECK-SD-LABEL: extract_v4i32_load: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: and x8, x0, #0x3 |
| ; CHECK-SD-NEXT: ldr w0, [x1, x8, lsl #2] |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_load: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: ldr w0, [x1, x8, lsl #2] |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = load <4 x i32>, ptr %arg |
| %d = extractelement <4 x i32> %vector, i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_load_const(<4 x i32> %a, <2 x i32> %b, i32 %c, ptr %arg) { |
| ; CHECK-LABEL: extract_v4i32_load_const: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ldr w0, [x1] |
| ; CHECK-NEXT: ret |
| entry: |
| %vector = load <4 x i32>, ptr %arg |
| %d = extractelement <4 x i32> %vector, i32 0 |
| ret i32 %d |
| } |
| |
| define double @extract_v4i32_bitcast(<4 x i32> %a, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_bitcast: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: bfi x8, x0, #3, #1 |
| ; CHECK-SD-NEXT: ldr d0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_bitcast: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: mov w9, w0 |
| ; CHECK-GI-NEXT: mov x8, sp |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: and x9, x9, #0x1 |
| ; CHECK-GI-NEXT: ldr d0, [x8, x9, lsl #3] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = bitcast <4 x i32> %a to <2 x double> |
| %d = extractelement <2 x double> %vector, i32 %c |
| ret double %d |
| } |
| |
| define double @extract_v4i32_bitcast_const(<4 x i32> %a, i32 %c) { |
| ; CHECK-LABEL: extract_v4i32_bitcast_const: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-NEXT: ret |
| entry: |
| %vector = bitcast <4 x i32> %a to <2 x double> |
| %d = extractelement <2 x double> %vector, i32 0 |
| ret double %d |
| } |
| |
| define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_shuffle: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: uzp1 v1.4s, v0.4s, v1.4s |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: mov v1.s[3], v0.s[3] |
| ; CHECK-SD-NEXT: str q1, [sp] |
| ; CHECK-SD-NEXT: ldr w0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_shuffle: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI35_0 |
| ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] |
| ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 3> |
| %d = extractelement <4 x i32> %vector, i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_shuffle_const(<4 x i32> %a, <4 x i32> %b, i32 %c) { |
| ; CHECK-LABEL: extract_v4i32_shuffle_const: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmov w0, s1 |
| ; CHECK-NEXT: ret |
| entry: |
| %vector = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 3> |
| %d = extractelement <4 x i32> %vector, i32 2 |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_splat(<4 x i32> %a, <2 x i32> %b, i32 %c) { |
| ; CHECK-SD-LABEL: extract_v4i32_splat: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: movi v0.4s, #11 |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr w0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_splat: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: movi v0.4s, #11 |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %d = extractelement <4 x i32> splat (i32 11), i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_splat_const(<4 x i32> %a, <2 x i32> %b, i32 %c) { |
| ; CHECK-LABEL: extract_v4i32_splat_const: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov w0, #11 // =0xb |
| ; CHECK-NEXT: ret |
| entry: |
| %d = extractelement <4 x i32> splat (i32 11), i32 0 |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_vp_add(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %mask, i32 %evl) { |
| ; CHECK-SD-LABEL: extract_v4i32_vp_add: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s |
| ; CHECK-SD-NEXT: mov x8, sp |
| ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 |
| ; CHECK-SD-NEXT: bfi x8, x0, #2, #2 |
| ; CHECK-SD-NEXT: str q0, [sp] |
| ; CHECK-SD-NEXT: ldr w0, [x8] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_vp_add: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s |
| ; CHECK-GI-NEXT: mov w8, w0 |
| ; CHECK-GI-NEXT: mov x9, sp |
| ; CHECK-GI-NEXT: and x8, x8, #0x3 |
| ; CHECK-GI-NEXT: str q0, [sp] |
| ; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl) |
| %d = extractelement <4 x i32> %vector, i32 %c |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_vp_add_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %mask, i32 %evl) { |
| ; CHECK-SD-LABEL: extract_v4i32_vp_add_const: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s |
| ; CHECK-SD-NEXT: mov w0, v0.s[3] |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_vp_add_const: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s |
| ; CHECK-GI-NEXT: mov s0, v0.s[3] |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %vector = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl) |
| %d = extractelement <4 x i32> %vector, i32 3 |
| ret i32 %d |
| } |
| |
| define i32 @extract_v4i32_phi(i64 %val, i32 %limit, ptr %ptr) { |
| ; CHECK-SD-LABEL: extract_v4i32_phi: |
| ; CHECK-SD: // %bb.0: // %entry |
| ; CHECK-SD-NEXT: dup v1.2s, w0 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI41_0 |
| ; CHECK-SD-NEXT: movi v0.2s, #16 |
| ; CHECK-SD-NEXT: ldr d2, [x8, :lo12:.LCPI41_0] |
| ; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s |
| ; CHECK-SD-NEXT: .LBB41_1: // %loop |
| ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-SD-NEXT: fmov w8, s1 |
| ; CHECK-SD-NEXT: add v1.2s, v1.2s, v0.2s |
| ; CHECK-SD-NEXT: cmp w8, w1 |
| ; CHECK-SD-NEXT: add w0, w8, #10 |
| ; CHECK-SD-NEXT: str w0, [x2, w8, sxtw #2] |
| ; CHECK-SD-NEXT: b.lo .LBB41_1 |
| ; CHECK-SD-NEXT: // %bb.2: // %ret |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: extract_v4i32_phi: |
| ; CHECK-GI: // %bb.0: // %entry |
| ; CHECK-GI-NEXT: adrp x8, .LCPI41_0 |
| ; CHECK-GI-NEXT: dup v0.2d, x0 |
| ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI41_0] |
| ; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d |
| ; CHECK-GI-NEXT: movi v0.2s, #16 |
| ; CHECK-GI-NEXT: xtn v1.2s, v1.2d |
| ; CHECK-GI-NEXT: .LBB41_1: // %loop |
| ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-GI-NEXT: fmov w8, s1 |
| ; CHECK-GI-NEXT: fmov w9, s1 |
| ; CHECK-GI-NEXT: add v1.2s, v1.2s, v0.2s |
| ; CHECK-GI-NEXT: cmp w8, w1 |
| ; CHECK-GI-NEXT: add w0, w9, #10 |
| ; CHECK-GI-NEXT: str w0, [x2, w8, sxtw #2] |
| ; CHECK-GI-NEXT: b.lo .LBB41_1 |
| ; CHECK-GI-NEXT: // %bb.2: // %ret |
| ; CHECK-GI-NEXT: ret |
| entry: |
| %tempvector = insertelement <2 x i64> undef, i64 %val, i32 0 |
| %vector = shufflevector <2 x i64> %tempvector, <2 x i64> undef, <2 x i32> zeroinitializer |
| %0 = add <2 x i64> %vector, <i64 1, i64 2> |
| %1 = trunc <2 x i64> %0 to <2 x i32> |
| br label %loop |
| |
| loop: |
| %2 = phi <2 x i32> [ %1, %entry ], [ %inc, %loop ] |
| %elt = extractelement <2 x i32> %2, i32 0 |
| %end = icmp ult i32 %elt, %limit |
| %3 = add i32 10, %elt |
| %4 = sext i32 %elt to i64 |
| %5 = getelementptr i32, ptr %ptr, i64 %4 |
| store i32 %3, ptr %5 |
| %inc = add <2 x i32> %2, <i32 16, i32 16> |
| br i1 %end, label %loop, label %ret |
| |
| ret: |
| ret i32 %3 |
| } |