| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 |
| // RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature -fullfp16 \ |
| // RUN: -S -disable-O0-optnone -emit-llvm -o - %s \ |
| // RUN: | opt -S -passes=sroa \ |
| // RUN: | FileCheck %s --check-prefixes=CHECK-NOFP16 |
| // RUN: %clang_cc1 -triple armv8a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \ |
| // RUN: -S -disable-O0-optnone -emit-llvm -o - %s \ |
| // RUN: | opt -S -passes=sroa \ |
| // RUN: | FileCheck %s --check-prefixes=CHECK-FP16 |
| |
| // REQUIRES: arm-registered-target |
| |
| #include <arm_neon.h> |
| |
| // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vbsl_f16( |
| // CHECK-NOFP16-SAME: <4 x i16> noundef [[A:%.*]], <2 x i32> noundef [[B_COERCE:%.*]], <2 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> [[TMP8]]) |
| // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP12]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vbsl_f16( |
| // CHECK-FP16-SAME: <4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) |
| // CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half> |
| // CHECK-FP16-NEXT: ret <4 x half> [[TMP3]] |
| // |
| float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { |
| return vbsl_f16(a, b, c); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vbslq_f16( |
| // CHECK-NOFP16-SAME: <8 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B_COERCE:%.*]], <4 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[C_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) |
| // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP12]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vbslq_f16( |
| // CHECK-FP16-SAME: <8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) |
| // CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half> |
| // CHECK-FP16-NEXT: ret <8 x half> [[TMP3]] |
| // |
| float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { |
| return vbslq_f16(a, b, c); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local void @test_vzip_f16( |
| // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
| // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> |
| // CHECK-NOFP16-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> |
| // CHECK-NOFP16-NEXT: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]] |
| // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-NOFP16-NEXT: [[VZIP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> |
| // CHECK-NOFP16-NEXT: store <4 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META3]] |
| // CHECK-NOFP16-NEXT: ret void |
| // |
| // CHECK-FP16-LABEL: define dso_local void @test_vzip_f16( |
| // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> |
| // CHECK-FP16-NEXT: store <4 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]] |
| // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-FP16-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> |
| // CHECK-FP16-NEXT: store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META3]] |
| // CHECK-FP16-NEXT: ret void |
| // |
| float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { |
| return vzip_f16(a, b); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local void @test_vzipq_f16( |
| // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
| // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> |
| // CHECK-NOFP16-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> |
| // CHECK-NOFP16-NEXT: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]] |
| // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-NOFP16-NEXT: [[VZIP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| // CHECK-NOFP16-NEXT: store <8 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META6]] |
| // CHECK-NOFP16-NEXT: ret void |
| // |
| // CHECK-FP16-LABEL: define dso_local void @test_vzipq_f16( |
| // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> |
| // CHECK-FP16-NEXT: store <8 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]] |
| // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-FP16-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| // CHECK-FP16-NEXT: store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META6]] |
| // CHECK-FP16-NEXT: ret void |
| // |
| float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { |
| return vzipq_f16(a, b); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local void @test_vuzp_f16( |
| // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
| // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> |
| // CHECK-NOFP16-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| // CHECK-NOFP16-NEXT: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]] |
| // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-NOFP16-NEXT: [[VUZP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| // CHECK-NOFP16-NEXT: store <4 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META9]] |
| // CHECK-NOFP16-NEXT: ret void |
| // |
| // CHECK-FP16-LABEL: define dso_local void @test_vuzp_f16( |
| // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| // CHECK-FP16-NEXT: store <4 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]] |
| // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-FP16-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| // CHECK-FP16-NEXT: store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META9]] |
| // CHECK-FP16-NEXT: ret void |
| // |
| float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { |
| return vuzp_f16(a, b); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local void @test_vuzpq_f16( |
| // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
| // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> |
| // CHECK-NOFP16-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| // CHECK-NOFP16-NEXT: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]] |
| // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-NOFP16-NEXT: [[VUZP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| // CHECK-NOFP16-NEXT: store <8 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META12]] |
| // CHECK-NOFP16-NEXT: ret void |
| // |
| // CHECK-FP16-LABEL: define dso_local void @test_vuzpq_f16( |
| // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| // CHECK-FP16-NEXT: store <8 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]] |
| // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-FP16-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| // CHECK-FP16-NEXT: store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META12]] |
| // CHECK-FP16-NEXT: ret void |
| // |
| float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { |
| return vuzpq_f16(a, b); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local void @test_vtrn_f16( |
| // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
| // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> |
| // CHECK-NOFP16-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> |
| // CHECK-NOFP16-NEXT: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]] |
| // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-NOFP16-NEXT: [[VTRN3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> |
| // CHECK-NOFP16-NEXT: store <4 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META15]] |
| // CHECK-NOFP16-NEXT: ret void |
| // |
| // CHECK-FP16-LABEL: define dso_local void @test_vtrn_f16( |
| // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> |
| // CHECK-FP16-NEXT: store <4 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]] |
| // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-FP16-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> |
| // CHECK-FP16-NEXT: store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META15]] |
| // CHECK-FP16-NEXT: ret void |
| // |
| float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { |
| return vtrn_f16(a, b); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local void @test_vtrnq_f16( |
| // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
| // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> |
| // CHECK-NOFP16-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> |
| // CHECK-NOFP16-NEXT: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]] |
| // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-NOFP16-NEXT: [[VTRN3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> |
| // CHECK-NOFP16-NEXT: store <8 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META18]] |
| // CHECK-NOFP16-NEXT: ret void |
| // |
| // CHECK-FP16-LABEL: define dso_local void @test_vtrnq_f16( |
| // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> |
| // CHECK-FP16-NEXT: store <8 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]] |
| // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1 |
| // CHECK-FP16-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> |
| // CHECK-FP16-NEXT: store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META18]] |
| // CHECK-FP16-NEXT: ret void |
| // |
| float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { |
| return vtrnq_f16(a, b); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vmov_n_f16( |
| // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 |
| // CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 |
| // CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 |
| // CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP0]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vmov_n_f16( |
| // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 |
| // CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 |
| // CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 |
| // CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 |
| // CHECK-FP16-NEXT: ret <4 x half> [[VECINIT3]] |
| // |
| float16x4_t test_vmov_n_f16(float16_t a) { |
| return vmov_n_f16(a); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vmovq_n_f16( |
| // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 |
| // CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 |
| // CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 |
| // CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 |
| // CHECK-NOFP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 |
| // CHECK-NOFP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 |
| // CHECK-NOFP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 |
| // CHECK-NOFP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP0]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vmovq_n_f16( |
| // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 |
| // CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 |
| // CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 |
| // CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 |
| // CHECK-FP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 |
| // CHECK-FP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 |
| // CHECK-FP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 |
| // CHECK-FP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 |
| // CHECK-FP16-NEXT: ret <8 x half> [[VECINIT7]] |
| // |
| float16x8_t test_vmovq_n_f16(float16_t a) { |
| return vmovq_n_f16(a); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_n_f16( |
| // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 |
| // CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 |
| // CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 |
| // CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP0]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_n_f16( |
| // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 |
| // CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 |
| // CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 |
| // CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 |
| // CHECK-FP16-NEXT: ret <4 x half> [[VECINIT3]] |
| // |
| float16x4_t test_vdup_n_f16(float16_t a) { |
| return vdup_n_f16(a); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_n_f16( |
| // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 |
| // CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 |
| // CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 |
| // CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 |
| // CHECK-NOFP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 |
| // CHECK-NOFP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 |
| // CHECK-NOFP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 |
| // CHECK-NOFP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP0]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_n_f16( |
| // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 |
| // CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 |
| // CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 |
| // CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 |
| // CHECK-FP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 |
| // CHECK-FP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 |
| // CHECK-FP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 |
| // CHECK-FP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 |
| // CHECK-FP16-NEXT: ret <8 x half> [[VECINIT7]] |
| // |
| float16x8_t test_vdupq_n_f16(float16_t a) { |
| return vdupq_n_f16(a); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_lane_f16( |
| // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> |
| // CHECK-NOFP16-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP4]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_lane_f16( |
| // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> |
| // CHECK-FP16-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| // CHECK-FP16-NEXT: ret <4 x half> [[LANE]] |
| // |
| float16x4_t test_vdup_lane_f16(float16x4_t a) { |
| return vdup_lane_f16(a, 3); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_lane_f16( |
| // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> |
| // CHECK-NOFP16-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP4]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_lane_f16( |
| // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> |
| // CHECK-FP16-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
| // CHECK-FP16-NEXT: ret <8 x half> [[LANE]] |
| // |
| float16x8_t test_vdupq_lane_f16(float16x4_t a) { |
| return vdupq_lane_f16(a, 3); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vext_f16( |
| // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> |
| // CHECK-NOFP16-NEXT: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 3, i32 4, i32 5> |
| // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[VEXT]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP7]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vext_f16( |
| // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> |
| // CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> |
| // CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> |
| // CHECK-FP16-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5> |
| // CHECK-FP16-NEXT: ret <4 x half> [[VEXT]] |
| // |
| float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) { |
| return vext_f16(a, b, 2); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vextq_f16( |
| // CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <16 x i8> |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16> |
| // CHECK-NOFP16-NEXT: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> |
| // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[VEXT]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP7]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vextq_f16( |
| // CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> |
| // CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> |
| // CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> |
| // CHECK-FP16-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> |
| // CHECK-FP16-NEXT: ret <8 x half> [[VEXT]] |
| // |
| float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) { |
| return vextq_f16(a, b, 5); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vrev64_f16( |
| // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <2 x i32> |
| // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP5]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vrev64_f16( |
| // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| // CHECK-FP16-NEXT: ret <4 x half> [[SHUFFLE_I]] |
| // |
| float16x4_t test_vrev64_f16(float16x4_t a) { |
| return vrev64_f16(a); |
| } |
| |
| // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vrev64q_f16( |
| // CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { |
| // CHECK-NOFP16-NEXT: entry: |
| // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
| // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[SHUFFLE_I]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> |
| // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <4 x i32> |
| // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP5]] |
| // |
| // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vrev64q_f16( |
| // CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] { |
| // CHECK-FP16-NEXT: entry: |
| // CHECK-FP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
| // CHECK-FP16-NEXT: ret <8 x half> [[SHUFFLE_I]] |
| // |
| float16x8_t test_vrev64q_f16(float16x8_t a) { |
| return vrev64q_f16(a); |
| } |
| //. |
| // CHECK-NOFP16: [[META3]] = !{[[META4:![0-9]+]]} |
| // CHECK-NOFP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"} |
| // CHECK-NOFP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"} |
| // CHECK-NOFP16: [[META6]] = !{[[META7:![0-9]+]]} |
| // CHECK-NOFP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"} |
| // CHECK-NOFP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"} |
| // CHECK-NOFP16: [[META9]] = !{[[META10:![0-9]+]]} |
| // CHECK-NOFP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"} |
| // CHECK-NOFP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"} |
| // CHECK-NOFP16: [[META12]] = !{[[META13:![0-9]+]]} |
| // CHECK-NOFP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"} |
| // CHECK-NOFP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"} |
| // CHECK-NOFP16: [[META15]] = !{[[META16:![0-9]+]]} |
| // CHECK-NOFP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"} |
| // CHECK-NOFP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"} |
| // CHECK-NOFP16: [[META18]] = !{[[META19:![0-9]+]]} |
| // CHECK-NOFP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"} |
| // CHECK-NOFP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"} |
| //. |
| // CHECK-FP16: [[META3]] = !{[[META4:![0-9]+]]} |
| // CHECK-FP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"} |
| // CHECK-FP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"} |
| // CHECK-FP16: [[META6]] = !{[[META7:![0-9]+]]} |
| // CHECK-FP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"} |
| // CHECK-FP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"} |
| // CHECK-FP16: [[META9]] = !{[[META10:![0-9]+]]} |
| // CHECK-FP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"} |
| // CHECK-FP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"} |
| // CHECK-FP16: [[META12]] = !{[[META13:![0-9]+]]} |
| // CHECK-FP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"} |
| // CHECK-FP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"} |
| // CHECK-FP16: [[META15]] = !{[[META16:![0-9]+]]} |
| // CHECK-FP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"} |
| // CHECK-FP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"} |
| // CHECK-FP16: [[META18]] = !{[[META19:![0-9]+]]} |
| // CHECK-FP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"} |
| // CHECK-FP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"} |
| //. |