| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py |
| // RUN: %clang_cc1 -triple aarch64 -target-feature +neon \ |
| // RUN: -target-feature +rcpc3 -disable-O0-optnone -emit-llvm -o - %s \ |
| // RUN: | opt -S -passes=mem2reg | FileCheck %s |
| |
| // REQUIRES: aarch64-registered-target |
| |
| #include <arm_neon.h> |
| |
| |
| // CHECK-LABEL: @test_vldap1q_lane_u64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 |
| // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1 |
| // CHECK-NEXT: ret <2 x i64> [[VLDAP1_LANE]] |
| // |
| uint64x2_t test_vldap1q_lane_u64(uint64_t *a, uint64x2_t b) { |
| return vldap1q_lane_u64(a, b, 1); |
| } |
| |
| // CHECK-LABEL: @test_vldap1q_lane_s64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 |
| // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1 |
| // CHECK-NEXT: ret <2 x i64> [[VLDAP1_LANE]] |
| // |
| int64x2_t test_vldap1q_lane_s64(int64_t *a, int64x2_t b) { |
| return vldap1q_lane_s64(a, b, 1); |
| } |
| |
| // CHECK-LABEL: @test_vldap1q_lane_f64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
| // CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8 |
| // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP1]], double [[TMP2]], i32 1 |
| // CHECK-NEXT: ret <2 x double> [[VLDAP1_LANE]] |
| // |
| float64x2_t test_vldap1q_lane_f64(float64_t *a, float64x2_t b) { |
| return vldap1q_lane_f64(a, b, 1); |
| } |
| |
| // CHECK-LABEL: @test_vldap1q_lane_p64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 |
| // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1 |
| // CHECK-NEXT: ret <2 x i64> [[VLDAP1_LANE]] |
| // |
| poly64x2_t test_vldap1q_lane_p64(poly64_t *a, poly64x2_t b) { |
| return vldap1q_lane_p64(a, b, 1); |
| } |
| |
| // CHECK-LABEL: @test_vldap1_lane_u64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 |
| // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0 |
| // CHECK-NEXT: ret <1 x i64> [[VLDAP1_LANE]] |
| // |
| uint64x1_t test_vldap1_lane_u64(uint64_t *a, uint64x1_t b) { |
| return vldap1_lane_u64(a, b, 0); |
| } |
| |
| // CHECK-LABEL: @test_vldap1_lane_s64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 |
| // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0 |
| // CHECK-NEXT: ret <1 x i64> [[VLDAP1_LANE]] |
| // |
| int64x1_t test_vldap1_lane_s64(int64_t *a, int64x1_t b) { |
| return vldap1_lane_s64(a, b, 0); |
| } |
| |
| // CHECK-LABEL: @test_vldap1_lane_f64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
| // CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8 |
| // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP1]], double [[TMP2]], i32 0 |
| // CHECK-NEXT: ret <1 x double> [[VLDAP1_LANE]] |
| // |
| float64x1_t test_vldap1_lane_f64(float64_t *a, float64x1_t b) { |
| return vldap1_lane_f64(a, b, 0); |
| } |
| |
| // CHECK-LABEL: @test_vldap1_lane_p64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 |
| // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0 |
| // CHECK-NEXT: ret <1 x i64> [[VLDAP1_LANE]] |
| // |
| poly64x1_t test_vldap1_lane_p64(poly64_t *a, poly64x1_t b) { |
| return vldap1_lane_p64(a, b, 0); |
| } |
| |
| // CHECK-LABEL: @test_vstl1q_lane_u64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 |
| // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 |
| // CHECK-NEXT: ret void |
| // |
| void test_vstl1q_lane_u64(uint64_t *a, uint64x2_t b) { |
| vstl1q_lane_u64(a, b, 1); |
| } |
| |
| // CHECK-LABEL: @test_vstl1q_lane_s64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 |
| // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 |
| // CHECK-NEXT: ret void |
| // |
| void test_vstl1q_lane_s64(int64_t *a, int64x2_t b) { |
| vstl1q_lane_s64(a, b, 1); |
| } |
| |
| // CHECK-LABEL: @test_vstl1q_lane_f64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
| // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 |
| // CHECK-NEXT: store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8 |
| // CHECK-NEXT: ret void |
| // |
| void test_vstl1q_lane_f64(float64_t *a, float64x2_t b) { |
| vstl1q_lane_f64(a, b, 1); |
| } |
| |
| // CHECK-LABEL: @test_vstl1q_lane_p64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 |
| // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 |
| // CHECK-NEXT: ret void |
| // |
| void test_vstl1q_lane_p64(poly64_t *a, poly64x2_t b) { |
| vstl1q_lane_p64(a, b, 1); |
| } |
| |
| // CHECK-LABEL: @test_vstl1_lane_u64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 |
| // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 |
| // CHECK-NEXT: ret void |
| // |
| void test_vstl1_lane_u64(uint64_t *a, uint64x1_t b) { |
| vstl1_lane_u64(a, b, 0); |
| } |
| |
| // CHECK-LABEL: @test_vstl1_lane_s64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 |
| // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 |
| // CHECK-NEXT: ret void |
| // |
| void test_vstl1_lane_s64(int64_t *a, int64x1_t b) { |
| vstl1_lane_s64(a, b, 0); |
| } |
| |
| // CHECK-LABEL: @test_vstl1_lane_f64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
| // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 |
| // CHECK-NEXT: store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8 |
| // CHECK-NEXT: ret void |
| // |
| void test_vstl1_lane_f64(float64_t *a, float64x1_t b) { |
| vstl1_lane_f64(a, b, 0); |
| } |
| |
| // CHECK-LABEL: @test_vstl1_lane_p64( |
| // CHECK-NEXT: entry: |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> |
| // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 |
| // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 |
| // CHECK-NEXT: ret void |
| // |
| void test_vstl1_lane_p64(poly64_t *a, poly64x1_t b) { |
| vstl1_lane_p64(a, b, 0); |
| } |