blob: b63e739f577d2767d3f447acbb831d7971d2b465 [file] [log] [blame]
; RUN: llc -mtriple=arm64-apple-ios7.0 -disable-post-ra -o - %s | FileCheck %s
@ptr = global i8* null
define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) {
; CHECK-LABEL: test_v8i8_pre_load:
; CHECK: ldr d0, [x0, #40]!
%newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
%val = load <8 x i8>, <8 x i8>* %newaddr, align 8
store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
ret <8 x i8> %val
}
define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) {
; CHECK-LABEL: test_v8i8_post_load:
; CHECK: ldr d0, [x0], #40
%newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
%val = load <8 x i8>, <8 x i8>* %addr, align 8
store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
ret <8 x i8> %val
}
define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) {
; CHECK-LABEL: test_v8i8_pre_store:
; CHECK: str d0, [x0, #40]!
%newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
store <8 x i8> %in, <8 x i8>* %newaddr, align 8
store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
ret void
}
define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) {
; CHECK-LABEL: test_v8i8_post_store:
; CHECK: str d0, [x0], #40
%newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
store <8 x i8> %in, <8 x i8>* %addr, align 8
store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
ret void
}
define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) {
; CHECK-LABEL: test_v4i16_pre_load:
; CHECK: ldr d0, [x0, #40]!
%newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
%val = load <4 x i16>, <4 x i16>* %newaddr, align 8
store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
ret <4 x i16> %val
}
define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) {
; CHECK-LABEL: test_v4i16_post_load:
; CHECK: ldr d0, [x0], #40
%newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
%val = load <4 x i16>, <4 x i16>* %addr, align 8
store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
ret <4 x i16> %val
}
define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) {
; CHECK-LABEL: test_v4i16_pre_store:
; CHECK: str d0, [x0, #40]!
%newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
store <4 x i16> %in, <4 x i16>* %newaddr, align 8
store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
ret void
}
define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) {
; CHECK-LABEL: test_v4i16_post_store:
; CHECK: str d0, [x0], #40
%newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
store <4 x i16> %in, <4 x i16>* %addr, align 8
store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
ret void
}
define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) {
; CHECK-LABEL: test_v2i32_pre_load:
; CHECK: ldr d0, [x0, #40]!
%newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
%val = load <2 x i32>, <2 x i32>* %newaddr, align 8
store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
ret <2 x i32> %val
}
define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) {
; CHECK-LABEL: test_v2i32_post_load:
; CHECK: ldr d0, [x0], #40
%newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
%val = load <2 x i32>, <2 x i32>* %addr, align 8
store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
ret <2 x i32> %val
}
define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) {
; CHECK-LABEL: test_v2i32_pre_store:
; CHECK: str d0, [x0, #40]!
%newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
store <2 x i32> %in, <2 x i32>* %newaddr, align 8
store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
ret void
}
define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) {
; CHECK-LABEL: test_v2i32_post_store:
; CHECK: str d0, [x0], #40
%newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
store <2 x i32> %in, <2 x i32>* %addr, align 8
store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
ret void
}
define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) {
; CHECK-LABEL: test_v2f32_pre_load:
; CHECK: ldr d0, [x0, #40]!
%newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
%val = load <2 x float>, <2 x float>* %newaddr, align 8
store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
ret <2 x float> %val
}
define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) {
; CHECK-LABEL: test_v2f32_post_load:
; CHECK: ldr d0, [x0], #40
%newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
%val = load <2 x float>, <2 x float>* %addr, align 8
store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
ret <2 x float> %val
}
define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) {
; CHECK-LABEL: test_v2f32_pre_store:
; CHECK: str d0, [x0, #40]!
%newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
store <2 x float> %in, <2 x float>* %newaddr, align 8
store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
ret void
}
define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) {
; CHECK-LABEL: test_v2f32_post_store:
; CHECK: str d0, [x0], #40
%newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
store <2 x float> %in, <2 x float>* %addr, align 8
store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
ret void
}
define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) {
; CHECK-LABEL: test_v1i64_pre_load:
; CHECK: ldr d0, [x0, #40]!
%newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
%val = load <1 x i64>, <1 x i64>* %newaddr, align 8
store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
ret <1 x i64> %val
}
define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) {
; CHECK-LABEL: test_v1i64_post_load:
; CHECK: ldr d0, [x0], #40
%newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
%val = load <1 x i64>, <1 x i64>* %addr, align 8
store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
ret <1 x i64> %val
}
define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) {
; CHECK-LABEL: test_v1i64_pre_store:
; CHECK: str d0, [x0, #40]!
%newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
store <1 x i64> %in, <1 x i64>* %newaddr, align 8
store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
ret void
}
define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) {
; CHECK-LABEL: test_v1i64_post_store:
; CHECK: str d0, [x0], #40
%newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
store <1 x i64> %in, <1 x i64>* %addr, align 8
store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
ret void
}
define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) {
; CHECK-LABEL: test_v16i8_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
%val = load <16 x i8>, <16 x i8>* %newaddr, align 8
store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
ret <16 x i8> %val
}
define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) {
; CHECK-LABEL: test_v16i8_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
%val = load <16 x i8>, <16 x i8>* %addr, align 8
store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
ret <16 x i8> %val
}
define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) {
; CHECK-LABEL: test_v16i8_pre_store:
; CHECK: str q0, [x0, #80]!
%newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
store <16 x i8> %in, <16 x i8>* %newaddr, align 8
store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
ret void
}
define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) {
; CHECK-LABEL: test_v16i8_post_store:
; CHECK: str q0, [x0], #80
%newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
store <16 x i8> %in, <16 x i8>* %addr, align 8
store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
ret void
}
define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) {
; CHECK-LABEL: test_v8i16_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
%val = load <8 x i16>, <8 x i16>* %newaddr, align 8
store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
ret <8 x i16> %val
}
define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) {
; CHECK-LABEL: test_v8i16_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
%val = load <8 x i16>, <8 x i16>* %addr, align 8
store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
ret <8 x i16> %val
}
define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) {
; CHECK-LABEL: test_v8i16_pre_store:
; CHECK: str q0, [x0, #80]!
%newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
store <8 x i16> %in, <8 x i16>* %newaddr, align 8
store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
ret void
}
define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) {
; CHECK-LABEL: test_v8i16_post_store:
; CHECK: str q0, [x0], #80
%newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
store <8 x i16> %in, <8 x i16>* %addr, align 8
store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
ret void
}
define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) {
; CHECK-LABEL: test_v4i32_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
%val = load <4 x i32>, <4 x i32>* %newaddr, align 8
store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
ret <4 x i32> %val
}
define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) {
; CHECK-LABEL: test_v4i32_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
%val = load <4 x i32>, <4 x i32>* %addr, align 8
store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
ret <4 x i32> %val
}
define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) {
; CHECK-LABEL: test_v4i32_pre_store:
; CHECK: str q0, [x0, #80]!
%newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
store <4 x i32> %in, <4 x i32>* %newaddr, align 8
store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
ret void
}
define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) {
; CHECK-LABEL: test_v4i32_post_store:
; CHECK: str q0, [x0], #80
%newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
store <4 x i32> %in, <4 x i32>* %addr, align 8
store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
ret void
}
define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) {
; CHECK-LABEL: test_v4f32_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
%val = load <4 x float>, <4 x float>* %newaddr, align 8
store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
ret <4 x float> %val
}
define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) {
; CHECK-LABEL: test_v4f32_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
%val = load <4 x float>, <4 x float>* %addr, align 8
store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
ret <4 x float> %val
}
define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) {
; CHECK-LABEL: test_v4f32_pre_store:
; CHECK: str q0, [x0, #80]!
%newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
store <4 x float> %in, <4 x float>* %newaddr, align 8
store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
ret void
}
define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) {
; CHECK-LABEL: test_v4f32_post_store:
; CHECK: str q0, [x0], #80
%newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
store <4 x float> %in, <4 x float>* %addr, align 8
store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
ret void
}
define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) {
; CHECK-LABEL: test_v2i64_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
%val = load <2 x i64>, <2 x i64>* %newaddr, align 8
store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
ret <2 x i64> %val
}
define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) {
; CHECK-LABEL: test_v2i64_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
%val = load <2 x i64>, <2 x i64>* %addr, align 8
store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
ret <2 x i64> %val
}
define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) {
; CHECK-LABEL: test_v2i64_pre_store:
; CHECK: str q0, [x0, #80]!
%newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
store <2 x i64> %in, <2 x i64>* %newaddr, align 8
store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
ret void
}
define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) {
; CHECK-LABEL: test_v2i64_post_store:
; CHECK: str q0, [x0], #80
%newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
store <2 x i64> %in, <2 x i64>* %addr, align 8
store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
ret void
}
define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) {
; CHECK-LABEL: test_v2f64_pre_load:
; CHECK: ldr q0, [x0, #80]!
%newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
%val = load <2 x double>, <2 x double>* %newaddr, align 8
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret <2 x double> %val
}
define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) {
; CHECK-LABEL: test_v2f64_post_load:
; CHECK: ldr q0, [x0], #80
%newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
%val = load <2 x double>, <2 x double>* %addr, align 8
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret <2 x double> %val
}
define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) {
; CHECK-LABEL: test_v2f64_pre_store:
; CHECK: str q0, [x0, #80]!
%newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
store <2 x double> %in, <2 x double>* %newaddr, align 8
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret void
}
define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) {
; CHECK-LABEL: test_v2f64_post_store:
; CHECK: str q0, [x0], #80
%newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
store <2 x double> %in, <2 x double>* %addr, align 8
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret void
}
define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) {
; CHECK-LABEL: test_v16i8_post_imm_st1_lane:
; CHECK: st1.b { v0 }[3], [x0], #1
%elt = extractelement <16 x i8> %in, i32 3
store i8 %elt, i8* %addr
%newaddr = getelementptr i8, i8* %addr, i32 1
ret i8* %newaddr
}
define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) {
; CHECK-LABEL: test_v16i8_post_reg_st1_lane:
; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
%elt = extractelement <16 x i8> %in, i32 3
store i8 %elt, i8* %addr
%newaddr = getelementptr i8, i8* %addr, i32 2
ret i8* %newaddr
}
define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) {
; CHECK-LABEL: test_v8i16_post_imm_st1_lane:
; CHECK: st1.h { v0 }[3], [x0], #2
%elt = extractelement <8 x i16> %in, i32 3
store i16 %elt, i16* %addr
%newaddr = getelementptr i16, i16* %addr, i32 1
ret i16* %newaddr
}
define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) {
; CHECK-LABEL: test_v8i16_post_reg_st1_lane:
; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
%elt = extractelement <8 x i16> %in, i32 3
store i16 %elt, i16* %addr
%newaddr = getelementptr i16, i16* %addr, i32 2
ret i16* %newaddr
}
define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) {
; CHECK-LABEL: test_v4i32_post_imm_st1_lane:
; CHECK: st1.s { v0 }[3], [x0], #4
%elt = extractelement <4 x i32> %in, i32 3
store i32 %elt, i32* %addr
%newaddr = getelementptr i32, i32* %addr, i32 1
ret i32* %newaddr
}
define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) {
; CHECK-LABEL: test_v4i32_post_reg_st1_lane:
; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
%elt = extractelement <4 x i32> %in, i32 3
store i32 %elt, i32* %addr
%newaddr = getelementptr i32, i32* %addr, i32 2
ret i32* %newaddr
}
define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) {
; CHECK-LABEL: test_v4f32_post_imm_st1_lane:
; CHECK: st1.s { v0 }[3], [x0], #4
%elt = extractelement <4 x float> %in, i32 3
store float %elt, float* %addr
%newaddr = getelementptr float, float* %addr, i32 1
ret float* %newaddr
}
define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) {
; CHECK-LABEL: test_v4f32_post_reg_st1_lane:
; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
%elt = extractelement <4 x float> %in, i32 3
store float %elt, float* %addr
%newaddr = getelementptr float, float* %addr, i32 2
ret float* %newaddr
}
define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) {
; CHECK-LABEL: test_v2i64_post_imm_st1_lane:
; CHECK: st1.d { v0 }[1], [x0], #8
%elt = extractelement <2 x i64> %in, i64 1
store i64 %elt, i64* %addr
%newaddr = getelementptr i64, i64* %addr, i64 1
ret i64* %newaddr
}
define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) {
; CHECK-LABEL: test_v2i64_post_reg_st1_lane:
; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
%elt = extractelement <2 x i64> %in, i64 1
store i64 %elt, i64* %addr
%newaddr = getelementptr i64, i64* %addr, i64 2
ret i64* %newaddr
}
define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) {
; CHECK-LABEL: test_v2f64_post_imm_st1_lane:
; CHECK: st1.d { v0 }[1], [x0], #8
%elt = extractelement <2 x double> %in, i32 1
store double %elt, double* %addr
%newaddr = getelementptr double, double* %addr, i32 1
ret double* %newaddr
}
define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) {
; CHECK-LABEL: test_v2f64_post_reg_st1_lane:
; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
%elt = extractelement <2 x double> %in, i32 1
store double %elt, double* %addr
%newaddr = getelementptr double, double* %addr, i32 2
ret double* %newaddr
}
define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) {
; CHECK-LABEL: test_v8i8_post_imm_st1_lane:
; CHECK: st1.b { v0 }[3], [x0], #1
%elt = extractelement <8 x i8> %in, i32 3
store i8 %elt, i8* %addr
%newaddr = getelementptr i8, i8* %addr, i32 1
ret i8* %newaddr
}
define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) {
; CHECK-LABEL: test_v8i8_post_reg_st1_lane:
; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
%elt = extractelement <8 x i8> %in, i32 3
store i8 %elt, i8* %addr
%newaddr = getelementptr i8, i8* %addr, i32 2
ret i8* %newaddr
}
define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) {
; CHECK-LABEL: test_v4i16_post_imm_st1_lane:
; CHECK: st1.h { v0 }[3], [x0], #2
%elt = extractelement <4 x i16> %in, i32 3
store i16 %elt, i16* %addr
%newaddr = getelementptr i16, i16* %addr, i32 1
ret i16* %newaddr
}
define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) {
; CHECK-LABEL: test_v4i16_post_reg_st1_lane:
; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
%elt = extractelement <4 x i16> %in, i32 3
store i16 %elt, i16* %addr
%newaddr = getelementptr i16, i16* %addr, i32 2
ret i16* %newaddr
}
define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) {
; CHECK-LABEL: test_v2i32_post_imm_st1_lane:
; CHECK: st1.s { v0 }[1], [x0], #4
%elt = extractelement <2 x i32> %in, i32 1
store i32 %elt, i32* %addr
%newaddr = getelementptr i32, i32* %addr, i32 1
ret i32* %newaddr
}
define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) {
; CHECK-LABEL: test_v2i32_post_reg_st1_lane:
; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
%elt = extractelement <2 x i32> %in, i32 1
store i32 %elt, i32* %addr
%newaddr = getelementptr i32, i32* %addr, i32 2
ret i32* %newaddr
}
define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) {
; CHECK-LABEL: test_v2f32_post_imm_st1_lane:
; CHECK: st1.s { v0 }[1], [x0], #4
%elt = extractelement <2 x float> %in, i32 1
store float %elt, float* %addr
%newaddr = getelementptr float, float* %addr, i32 1
ret float* %newaddr
}
define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
; CHECK-LABEL: test_v2f32_post_reg_st1_lane:
; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
%elt = extractelement <2 x float> %in, i32 1
store float %elt, float* %addr
%newaddr = getelementptr float, float* %addr, i32 2
ret float* %newaddr
}
define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v16i8_post_imm_ld2:
;CHECK: ld2.16b { v0, v1 }, [x0], #32
%ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i32 32
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld2
}
define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
;CHECK-LABEL: test_v16i8_post_reg_ld2:
;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld2
}
declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v8i8_post_imm_ld2:
;CHECK: ld2.8b { v0, v1 }, [x0], #16
%ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i32 16
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld2
}
define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
;CHECK-LABEL: test_v8i8_post_reg_ld2:
;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld2
}
declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v8i16_post_imm_ld2:
;CHECK: ld2.8h { v0, v1 }, [x0], #32
%ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i32 16
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld2
}
define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
;CHECK-LABEL: test_v8i16_post_reg_ld2:
;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld2
}
declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*)
define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v4i16_post_imm_ld2:
;CHECK: ld2.4h { v0, v1 }, [x0], #16
%ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i32 8
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld2
}
define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4i16_post_reg_ld2:
;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld2
}
declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*)
define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v4i32_post_imm_ld2:
;CHECK: ld2.4s { v0, v1 }, [x0], #32
%ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i32 8
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld2
}
define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4i32_post_reg_ld2:
;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld2
}
declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*)
define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v2i32_post_imm_ld2:
;CHECK: ld2.2s { v0, v1 }, [x0], #16
%ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i32 4
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld2
}
define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2i32_post_reg_ld2:
;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld2
}
declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*)
define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
;CHECK-LABEL: test_v2i64_post_imm_ld2:
;CHECK: ld2.2d { v0, v1 }, [x0], #32
%ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld2
}
define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2i64_post_reg_ld2:
;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld2
}
declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*)
define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
;CHECK-LABEL: test_v1i64_post_imm_ld2:
;CHECK: ld1.1d { v0, v1 }, [x0], #16
%ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i32 2
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld2
}
define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
;CHECK-LABEL: test_v1i64_post_reg_ld2:
;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld2
}
declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*)
define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) {
;CHECK-LABEL: test_v4f32_post_imm_ld2:
;CHECK: ld2.4s { v0, v1 }, [x0], #32
%ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i32 8
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld2
}
define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4f32_post_reg_ld2:
;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld2
}
declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*)
define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) {
;CHECK-LABEL: test_v2f32_post_imm_ld2:
;CHECK: ld2.2s { v0, v1 }, [x0], #16
%ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i32 4
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld2
}
define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2f32_post_reg_ld2:
;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld2
}
declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*)
define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) {
;CHECK-LABEL: test_v2f64_post_imm_ld2:
;CHECK: ld2.2d { v0, v1 }, [x0], #32
%ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld2
}
define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2f64_post_reg_ld2:
;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld2
}
declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double*)
define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) {
;CHECK-LABEL: test_v1f64_post_imm_ld2:
;CHECK: ld1.1d { v0, v1 }, [x0], #16
%ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i32 2
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld2
}
define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
;CHECK-LABEL: test_v1f64_post_reg_ld2:
;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld2
}
declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double*)
define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v16i8_post_imm_ld3:
;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i32 48
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}
define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
;CHECK-LABEL: test_v16i8_post_reg_ld3:
;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}
declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*)
define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v8i8_post_imm_ld3:
;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i32 24
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}
define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
;CHECK-LABEL: test_v8i8_post_reg_ld3:
;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}
declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*)
define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v8i16_post_imm_ld3:
;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i32 24
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}
define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
;CHECK-LABEL: test_v8i16_post_reg_ld3:
;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}
declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*)
define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v4i16_post_imm_ld3:
;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i32 12
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}
define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4i16_post_reg_ld3:
;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}
declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*)
define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v4i32_post_imm_ld3:
;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i32 12
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}
define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4i32_post_reg_ld3:
;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}
declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*)
define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v2i32_post_imm_ld3:
;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i32 6
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}
define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2i32_post_reg_ld3:
;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}
declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*)
define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) {
;CHECK-LABEL: test_v2i64_post_imm_ld3:
;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i32 6
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}
define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2i64_post_reg_ld3:
;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}
declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*)
define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) {
;CHECK-LABEL: test_v1i64_post_imm_ld3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i32 3
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}
define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
;CHECK-LABEL: test_v1i64_post_reg_ld3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}
declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*)
define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) {
;CHECK-LABEL: test_v4f32_post_imm_ld3:
;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i32 12
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}
define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4f32_post_reg_ld3:
;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}
declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*)
define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) {
;CHECK-LABEL: test_v2f32_post_imm_ld3:
;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i32 6
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}
define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2f32_post_reg_ld3:
;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}
declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float*)
define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) {
;CHECK-LABEL: test_v2f64_post_imm_ld3:
;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i32 6
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}
define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2f64_post_reg_ld3:
;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}
declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double*)
define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) {
;CHECK-LABEL: test_v1f64_post_imm_ld3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i32 3
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}
define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
;CHECK-LABEL: test_v1f64_post_reg_ld3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}
declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double*)
define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v16i8_post_imm_ld4:
;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i32 64
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}
define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
;CHECK-LABEL: test_v16i8_post_reg_ld4:
;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*)
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v8i8_post_imm_ld4:
;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i32 32
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
;CHECK-LABEL: test_v8i8_post_reg_ld4:
;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}
declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*)
define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v8i16_post_imm_ld4:
;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i32 32
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}
define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
;CHECK-LABEL: test_v8i16_post_reg_ld4:
;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}
declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*)
define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v4i16_post_imm_ld4:
;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i32 16
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}
define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4i16_post_reg_ld4:
;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}
declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*)
define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v4i32_post_imm_ld4:
;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i32 16
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}
define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4i32_post_reg_ld4:
;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}
declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*)
define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v2i32_post_imm_ld4:
;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i32 8
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}
define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2i32_post_reg_ld4:
;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}
declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*)
define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) {
;CHECK-LABEL: test_v2i64_post_imm_ld4:
;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i32 8
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}
define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2i64_post_reg_ld4:
;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}
declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*)
define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) {
;CHECK-LABEL: test_v1i64_post_imm_ld4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}
define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
;CHECK-LABEL: test_v1i64_post_reg_ld4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}
declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*)
define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) {
;CHECK-LABEL: test_v4f32_post_imm_ld4:
;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i32 16
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}
define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4f32_post_reg_ld4:
;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}
declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float*)
define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) {
;CHECK-LABEL: test_v2f32_post_imm_ld4:
;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i32 8
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}
define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2f32_post_reg_ld4:
;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}
declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float*)
define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) {
;CHECK-LABEL: test_v2f64_post_imm_ld4:
;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i32 8
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}
define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2f64_post_reg_ld4:
;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}
declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double*)
define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) {
;CHECK-LABEL: test_v1f64_post_imm_ld4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}
define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
;CHECK-LABEL: test_v1f64_post_reg_ld4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}
declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*)
define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v16i8_post_imm_ld1x2:
;CHECK: ld1.16b { v0, v1 }, [x0], #32
%ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i32 32
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld1x2
}
define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
;CHECK-LABEL: test_v16i8_post_reg_ld1x2:
;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld1x2
}
declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*)
define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v8i8_post_imm_ld1x2:
;CHECK: ld1.8b { v0, v1 }, [x0], #16
%ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i32 16
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld1x2
}
define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
;CHECK-LABEL: test_v8i8_post_reg_ld1x2:
;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld1x2
}
declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*)
define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v8i16_post_imm_ld1x2:
;CHECK: ld1.8h { v0, v1 }, [x0], #32
%ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i32 16
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld1x2
}
define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
;CHECK-LABEL: test_v8i16_post_reg_ld1x2:
;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld1x2
}
declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*)
define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v4i16_post_imm_ld1x2:
;CHECK: ld1.4h { v0, v1 }, [x0], #16
%ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i32 8
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld1x2
}
define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4i16_post_reg_ld1x2:
;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld1x2
}
declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*)
define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v4i32_post_imm_ld1x2:
;CHECK: ld1.4s { v0, v1 }, [x0], #32
%ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i32 8
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld1x2
}
define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4i32_post_reg_ld1x2:
;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld1x2
}
declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*)
define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v2i32_post_imm_ld1x2:
;CHECK: ld1.2s { v0, v1 }, [x0], #16
%ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i32 4
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld1x2
}
define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2i32_post_reg_ld1x2:
;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld1x2
}
declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*)
define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
;CHECK-LABEL: test_v2i64_post_imm_ld1x2:
;CHECK: ld1.2d { v0, v1 }, [x0], #32
%ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld1x2
}
define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2i64_post_reg_ld1x2:
;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld1x2
}
declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*)
define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
;CHECK-LABEL: test_v1i64_post_imm_ld1x2:
;CHECK: ld1.1d { v0, v1 }, [x0], #16
%ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i32 2
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld1x2
}
define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
;CHECK-LABEL: test_v1i64_post_reg_ld1x2:
;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
%tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld1x2
}
declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*)
define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) {
;CHECK-LABEL: test_v4f32_post_imm_ld1x2:
;CHECK: ld1.4s { v0, v1 }, [x0], #32
%ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i32 8
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld1x2
}
define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4f32_post_reg_ld1x2:
;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld1x2
}
declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*)
define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) {
;CHECK-LABEL: test_v2f32_post_imm_ld1x2:
;CHECK: ld1.2s { v0, v1 }, [x0], #16
%ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i32 4
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld1x2
}
define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2f32_post_reg_ld1x2:
;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
%tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld1x2
}
declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*)
define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) {
;CHECK-LABEL: test_v2f64_post_imm_ld1x2:
;CHECK: ld1.2d { v0, v1 }, [x0], #32
%ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld1x2
}
define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
;CHECK-LABEL: test_v2f64_post_reg_ld1x2:
;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld1x2
}
declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*)
define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) {
;CHECK-LABEL: test_v1f64_post_imm_ld1x2:
;CHECK: ld1.1d { v0, v1 }, [x0], #16
%ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i32 2
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld1x2
}
define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
;CHECK-LABEL: test_v1f64_post_reg_ld1x2:
;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
%tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld1x2
}
declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*)
define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v16i8_post_imm_ld1x3:
;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48
%ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i32 48
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
}
define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
;CHECK-LABEL: test_v16i8_post_reg_ld1x3:
;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
}
declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*)
define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v8i8_post_imm_ld1x3:
;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24
%ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i32 24
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
}
define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
;CHECK-LABEL: test_v8i8_post_reg_ld1x3:
;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
%tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
}
declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*)
define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v8i16_post_imm_ld1x3:
;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48
%ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i32 24
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
}
define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
;CHECK-LABEL: test_v8i16_post_reg_ld1x3:
;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
}
declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*)
define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v4i16_post_imm_ld1x3:
;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24
%ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i32 12
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
}
define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4i16_post_reg_ld1x3:
;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
%tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
}
declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*)
define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v4i32_post_imm_ld1x3:
;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
%ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i32 12
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
}
define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
;CHECK-LABEL: test_v4i32_post_reg_ld1x3:
;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
%tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
}
declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32