| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 |
| // RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon -target-feature +fullfp16 -target-feature +i8mm \ |
| // RUN: -disable-O0-optnone -emit-llvm -o - %s \ |
| // RUN: | opt -S -passes=mem2reg,sroa \ |
| // RUN: | FileCheck %s |
| |
| // REQUIRES: arm-registered-target |
| |
| #include <arm_neon.h> |
| |
| // CHECK-LABEL: define dso_local <4 x i32> @test_vmmlaq_s32( |
| // CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8> |
| // CHECK-NEXT: [[VMMLA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
| // CHECK-NEXT: [[VMMLA1_I:%.*]] = call <4 x i32> @llvm.arm.neon.smmla.v4i32.v16i8(<4 x i32> [[VMMLA_I]], <16 x i8> [[A]], <16 x i8> [[B]]) |
| // CHECK-NEXT: ret <4 x i32> [[VMMLA1_I]] |
| // |
| int32x4_t test_vmmlaq_s32(int32x4_t r, int8x16_t a, int8x16_t b) { |
| return vmmlaq_s32(r, a, b); |
| } |
| |
| // CHECK-LABEL: define dso_local <4 x i32> @test_vmmlaq_u32( |
| // CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8> |
| // CHECK-NEXT: [[VMMLA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
| // CHECK-NEXT: [[VMMLA1_I:%.*]] = call <4 x i32> @llvm.arm.neon.ummla.v4i32.v16i8(<4 x i32> [[VMMLA_I]], <16 x i8> [[A]], <16 x i8> [[B]]) |
| // CHECK-NEXT: ret <4 x i32> [[VMMLA1_I]] |
| // |
| uint32x4_t test_vmmlaq_u32(uint32x4_t r, uint8x16_t a, uint8x16_t b) { |
| return vmmlaq_u32(r, a, b); |
| } |
| |
| // CHECK-LABEL: define dso_local <4 x i32> @test_vusmmlaq_s32( |
| // CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8> |
| // CHECK-NEXT: [[VUSMMLA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
| // CHECK-NEXT: [[VUSMMLA1_I:%.*]] = call <4 x i32> @llvm.arm.neon.usmmla.v4i32.v16i8(<4 x i32> [[VUSMMLA_I]], <16 x i8> [[A]], <16 x i8> [[B]]) |
| // CHECK-NEXT: ret <4 x i32> [[VUSMMLA1_I]] |
| // |
| int32x4_t test_vusmmlaq_s32(int32x4_t r, uint8x16_t a, int8x16_t b) { |
| return vusmmlaq_s32(r, a, b); |
| } |
| |
| // CHECK-LABEL: define dso_local <2 x i32> @test_vusdot_s32( |
| // CHECK-SAME: <2 x i32> noundef [[R:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[R]] to <8 x i8> |
| // CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
| // CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> [[VUSDOT_I]], <8 x i8> [[A]], <8 x i8> [[B]]) |
| // CHECK-NEXT: ret <2 x i32> [[VUSDOT1_I]] |
| // |
| int32x2_t test_vusdot_s32(int32x2_t r, uint8x8_t a, int8x8_t b) { |
| return vusdot_s32(r, a, b); |
| } |
| |
| // CHECK-LABEL: define dso_local <2 x i32> @test_vusdot_lane_s32( |
| // CHECK-SAME: <2 x i32> noundef [[R:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> |
| // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> |
| // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer |
| // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> |
| // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[R]] to <8 x i8> |
| // CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> |
| // CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> [[VUSDOT_I]], <8 x i8> [[A]], <8 x i8> [[TMP3]]) |
| // CHECK-NEXT: ret <2 x i32> [[VUSDOT1_I]] |
| // |
| int32x2_t test_vusdot_lane_s32(int32x2_t r, uint8x8_t a, int8x8_t b) { |
| return vusdot_lane_s32(r, a, b, 0); |
| } |
| |
| // CHECK-LABEL: define dso_local <2 x i32> @test_vsudot_lane_s32( |
| // CHECK-SAME: <2 x i32> noundef [[R:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> |
| // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> |
| // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer |
| // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> |
| // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[R]] to <8 x i8> |
| // CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> |
| // CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> [[VUSDOT_I]], <8 x i8> [[TMP3]], <8 x i8> [[A]]) |
| // CHECK-NEXT: ret <2 x i32> [[VUSDOT1_I]] |
| // |
| int32x2_t test_vsudot_lane_s32(int32x2_t r, int8x8_t a, uint8x8_t b) { |
| return vsudot_lane_s32(r, a, b, 0); |
| } |
| |
| // CHECK-LABEL: define dso_local <4 x i32> @test_vusdotq_lane_s32( |
| // CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> |
| // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> |
| // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <4 x i32> zeroinitializer |
| // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8> |
| // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8> |
| // CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> |
| // CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32> [[VUSDOT_I]], <16 x i8> [[A]], <16 x i8> [[TMP3]]) |
| // CHECK-NEXT: ret <4 x i32> [[VUSDOT1_I]] |
| // |
| int32x4_t test_vusdotq_lane_s32(int32x4_t r, uint8x16_t a, int8x8_t b) { |
| return vusdotq_lane_s32(r, a, b, 0); |
| } |
| |
| // CHECK-LABEL: define dso_local <4 x i32> @test_vsudotq_lane_s32( |
| // CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> |
| // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> |
| // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> |
| // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <4 x i32> zeroinitializer |
| // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8> |
| // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8> |
| // CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> |
| // CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32> [[VUSDOT_I]], <16 x i8> [[TMP3]], <16 x i8> [[A]]) |
| // CHECK-NEXT: ret <4 x i32> [[VUSDOT1_I]] |
| // |
| int32x4_t test_vsudotq_lane_s32(int32x4_t r, int8x16_t a, uint8x8_t b) { |
| return vsudotq_lane_s32(r, a, b, 0); |
| } |