| // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ |
| // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \ |
| // RUN: | opt -S -mem2reg \ |
| // RUN: | FileCheck %s |
| |
| // Test new aarch64 intrinsics and types |
| |
| #include <arm_neon.h> |
| |
| // CHECK-LABEL: @test_vadd_s8( |
| // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2 |
| // CHECK: ret <8 x i8> [[ADD_I]] |
| int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) { |
| return vadd_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vadd_s16( |
| // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2 |
| // CHECK: ret <4 x i16> [[ADD_I]] |
| int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) { |
| return vadd_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vadd_s32( |
| // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2 |
| // CHECK: ret <2 x i32> [[ADD_I]] |
| int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) { |
| return vadd_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vadd_s64( |
| // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2 |
| // CHECK: ret <1 x i64> [[ADD_I]] |
| int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) { |
| return vadd_s64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vadd_f32( |
| // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2 |
| // CHECK: ret <2 x float> [[ADD_I]] |
| float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) { |
| return vadd_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vadd_u8( |
| // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2 |
| // CHECK: ret <8 x i8> [[ADD_I]] |
| uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vadd_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vadd_u16( |
| // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2 |
| // CHECK: ret <4 x i16> [[ADD_I]] |
| uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vadd_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vadd_u32( |
| // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2 |
| // CHECK: ret <2 x i32> [[ADD_I]] |
| uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vadd_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vadd_u64( |
| // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2 |
| // CHECK: ret <1 x i64> [[ADD_I]] |
| uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) { |
| return vadd_u64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaddq_s8( |
| // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2 |
| // CHECK: ret <16 x i8> [[ADD_I]] |
| int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) { |
| return vaddq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaddq_s16( |
| // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2 |
| // CHECK: ret <8 x i16> [[ADD_I]] |
| int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) { |
| return vaddq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaddq_s32( |
| // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2 |
| // CHECK: ret <4 x i32> [[ADD_I]] |
| int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) { |
| return vaddq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaddq_s64( |
| // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2 |
| // CHECK: ret <2 x i64> [[ADD_I]] |
| int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) { |
| return vaddq_s64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaddq_f32( |
| // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2 |
| // CHECK: ret <4 x float> [[ADD_I]] |
| float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) { |
| return vaddq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaddq_f64( |
| // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2 |
| // CHECK: ret <2 x double> [[ADD_I]] |
| float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) { |
| return vaddq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaddq_u8( |
| // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2 |
| // CHECK: ret <16 x i8> [[ADD_I]] |
| uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vaddq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaddq_u16( |
| // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2 |
| // CHECK: ret <8 x i16> [[ADD_I]] |
| uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vaddq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaddq_u32( |
| // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2 |
| // CHECK: ret <4 x i32> [[ADD_I]] |
| uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vaddq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaddq_u64( |
| // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2 |
| // CHECK: ret <2 x i64> [[ADD_I]] |
| uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) { |
| return vaddq_u64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsub_s8( |
| // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2 |
| // CHECK: ret <8 x i8> [[SUB_I]] |
| int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) { |
| return vsub_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsub_s16( |
| // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2 |
| // CHECK: ret <4 x i16> [[SUB_I]] |
| int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) { |
| return vsub_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsub_s32( |
| // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2 |
| // CHECK: ret <2 x i32> [[SUB_I]] |
| int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) { |
| return vsub_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsub_s64( |
| // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2 |
| // CHECK: ret <1 x i64> [[SUB_I]] |
| int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) { |
| return vsub_s64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsub_f32( |
| // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2 |
| // CHECK: ret <2 x float> [[SUB_I]] |
| float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) { |
| return vsub_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsub_u8( |
| // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2 |
| // CHECK: ret <8 x i8> [[SUB_I]] |
| uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vsub_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsub_u16( |
| // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2 |
| // CHECK: ret <4 x i16> [[SUB_I]] |
| uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vsub_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsub_u32( |
| // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2 |
| // CHECK: ret <2 x i32> [[SUB_I]] |
| uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vsub_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsub_u64( |
| // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2 |
| // CHECK: ret <1 x i64> [[SUB_I]] |
| uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) { |
| return vsub_u64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsubq_s8( |
| // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2 |
| // CHECK: ret <16 x i8> [[SUB_I]] |
| int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) { |
| return vsubq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsubq_s16( |
| // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2 |
| // CHECK: ret <8 x i16> [[SUB_I]] |
| int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) { |
| return vsubq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsubq_s32( |
| // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2 |
| // CHECK: ret <4 x i32> [[SUB_I]] |
| int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) { |
| return vsubq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsubq_s64( |
| // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2 |
| // CHECK: ret <2 x i64> [[SUB_I]] |
| int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) { |
| return vsubq_s64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsubq_f32( |
| // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2 |
| // CHECK: ret <4 x float> [[SUB_I]] |
| float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) { |
| return vsubq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsubq_f64( |
| // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2 |
| // CHECK: ret <2 x double> [[SUB_I]] |
| float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) { |
| return vsubq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsubq_u8( |
| // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2 |
| // CHECK: ret <16 x i8> [[SUB_I]] |
| uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vsubq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsubq_u16( |
| // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2 |
| // CHECK: ret <8 x i16> [[SUB_I]] |
| uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vsubq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsubq_u32( |
| // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2 |
| // CHECK: ret <4 x i32> [[SUB_I]] |
| uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vsubq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vsubq_u64( |
| // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2 |
| // CHECK: ret <2 x i64> [[SUB_I]] |
| uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) { |
| return vsubq_u64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmul_s8( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2 |
| // CHECK: ret <8 x i8> [[MUL_I]] |
| int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) { |
| return vmul_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmul_s16( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2 |
| // CHECK: ret <4 x i16> [[MUL_I]] |
| int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) { |
| return vmul_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmul_s32( |
| // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2 |
| // CHECK: ret <2 x i32> [[MUL_I]] |
| int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) { |
| return vmul_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmul_f32( |
| // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2 |
| // CHECK: ret <2 x float> [[MUL_I]] |
| float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) { |
| return vmul_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmul_u8( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2 |
| // CHECK: ret <8 x i8> [[MUL_I]] |
| uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vmul_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmul_u16( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2 |
| // CHECK: ret <4 x i16> [[MUL_I]] |
| uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vmul_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmul_u32( |
| // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2 |
| // CHECK: ret <2 x i32> [[MUL_I]] |
| uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vmul_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmulq_s8( |
| // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2 |
| // CHECK: ret <16 x i8> [[MUL_I]] |
| int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) { |
| return vmulq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmulq_s16( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2 |
| // CHECK: ret <8 x i16> [[MUL_I]] |
| int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) { |
| return vmulq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmulq_s32( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2 |
| // CHECK: ret <4 x i32> [[MUL_I]] |
| int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) { |
| return vmulq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmulq_u8( |
| // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2 |
| // CHECK: ret <16 x i8> [[MUL_I]] |
| uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vmulq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmulq_u16( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2 |
| // CHECK: ret <8 x i16> [[MUL_I]] |
| uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vmulq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmulq_u32( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2 |
| // CHECK: ret <4 x i32> [[MUL_I]] |
| uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vmulq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmulq_f32( |
| // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2 |
| // CHECK: ret <4 x float> [[MUL_I]] |
| float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) { |
| return vmulq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmulq_f64( |
| // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2 |
| // CHECK: ret <2 x double> [[MUL_I]] |
| float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) { |
| return vmulq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmul_p8( |
| // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) |
| // CHECK: ret <8 x i8> [[VMUL_V_I]] |
| poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) { |
| return vmul_p8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmulq_p8( |
| // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) |
| // CHECK: ret <16 x i8> [[VMULQ_V_I]] |
| poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) { |
| return vmulq_p8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vmla_s8( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]] |
| // CHECK: ret <8 x i8> [[ADD_I]] |
| int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { |
| return vmla_s8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmla_s16( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]] |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[TMP0]] |
| int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { |
| return vmla_s16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmla_s32( |
| // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]] |
| // CHECK: ret <2 x i32> [[ADD_I]] |
| int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { |
| return vmla_s32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmla_f32( |
| // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]] |
| // CHECK: ret <2 x float> [[ADD_I]] |
| float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { |
| return vmla_f32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmla_u8( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]] |
| // CHECK: ret <8 x i8> [[ADD_I]] |
| uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { |
| return vmla_u8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmla_u16( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]] |
| // CHECK: ret <4 x i16> [[ADD_I]] |
| uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { |
| return vmla_u16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmla_u32( |
| // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]] |
| // CHECK: ret <2 x i32> [[ADD_I]] |
| uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { |
| return vmla_u32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlaq_s8( |
| // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]] |
| // CHECK: ret <16 x i8> [[ADD_I]] |
| int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { |
| return vmlaq_s8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlaq_s16( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]] |
| // CHECK: ret <8 x i16> [[ADD_I]] |
| int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { |
| return vmlaq_s16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlaq_s32( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]] |
| // CHECK: ret <4 x i32> [[ADD_I]] |
| int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { |
| return vmlaq_s32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlaq_f32( |
| // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]] |
| // CHECK: ret <4 x float> [[ADD_I]] |
| float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { |
| return vmlaq_f32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlaq_u8( |
| // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]] |
| // CHECK: ret <16 x i8> [[ADD_I]] |
| uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { |
| return vmlaq_u8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlaq_u16( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]] |
| // CHECK: ret <8 x i16> [[ADD_I]] |
| uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { |
| return vmlaq_u16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlaq_u32( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]] |
| // CHECK: ret <4 x i32> [[ADD_I]] |
| uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { |
| return vmlaq_u32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlaq_f64( |
| // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3 |
| // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]] |
| // CHECK: ret <2 x double> [[ADD_I]] |
| float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { |
| return vmlaq_f64(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmls_s8( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]] |
| // CHECK: ret <8 x i8> [[SUB_I]] |
| int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { |
| return vmls_s8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmls_s16( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]] |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[TMP0]] |
| int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { |
| return vmls_s16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmls_s32( |
| // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]] |
| // CHECK: ret <2 x i32> [[SUB_I]] |
| int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { |
| return vmls_s32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmls_f32( |
| // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]] |
| // CHECK: ret <2 x float> [[SUB_I]] |
| float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { |
| return vmls_f32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmls_u8( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]] |
| // CHECK: ret <8 x i8> [[SUB_I]] |
| uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { |
| return vmls_u8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmls_u16( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]] |
| // CHECK: ret <4 x i16> [[SUB_I]] |
| uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { |
| return vmls_u16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmls_u32( |
| // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]] |
| // CHECK: ret <2 x i32> [[SUB_I]] |
| uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { |
| return vmls_u32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlsq_s8( |
| // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]] |
| // CHECK: ret <16 x i8> [[SUB_I]] |
| int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { |
| return vmlsq_s8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlsq_s16( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]] |
| // CHECK: ret <8 x i16> [[SUB_I]] |
| int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { |
| return vmlsq_s16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlsq_s32( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]] |
| // CHECK: ret <4 x i32> [[SUB_I]] |
| int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { |
| return vmlsq_s32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlsq_f32( |
| // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]] |
| // CHECK: ret <4 x float> [[SUB_I]] |
| float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { |
| return vmlsq_f32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlsq_u8( |
| // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]] |
| // CHECK: ret <16 x i8> [[SUB_I]] |
| uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { |
| return vmlsq_u8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlsq_u16( |
| // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]] |
| // CHECK: ret <8 x i16> [[SUB_I]] |
| uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { |
| return vmlsq_u16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlsq_u32( |
| // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]] |
| // CHECK: ret <4 x i32> [[SUB_I]] |
| uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { |
| return vmlsq_u32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vmlsq_f64( |
| // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3 |
| // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]] |
| // CHECK: ret <2 x double> [[SUB_I]] |
| float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { |
| return vmlsq_f64(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vfma_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> |
| // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1) |
| // CHECK: ret <2 x float> [[TMP3]] |
| float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { |
| return vfma_f32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vfmaq_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> |
| // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1) |
| // CHECK: ret <4 x float> [[TMP3]] |
| float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { |
| return vfmaq_f32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vfmaq_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> |
| // CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1) |
| // CHECK: ret <2 x double> [[TMP3]] |
| float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { |
| return vfmaq_f64(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vfms_f32( |
| // CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v2 |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> |
| // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1) |
| // CHECK: ret <2 x float> [[TMP3]] |
| float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { |
| return vfms_f32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vfmsq_f32( |
| // CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v2 |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> |
| // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1) |
| // CHECK: ret <4 x float> [[TMP3]] |
| float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { |
| return vfmsq_f32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vfmsq_f64( |
| // CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v2 |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> |
| // CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1) |
| // CHECK: ret <2 x double> [[TMP3]] |
| float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { |
| return vfmsq_f64(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vdivq_f64( |
| // CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2 |
| // CHECK: ret <2 x double> [[DIV_I]] |
| float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) { |
| return vdivq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vdivq_f32( |
| // CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2 |
| // CHECK: ret <4 x float> [[DIV_I]] |
| float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) { |
| return vdivq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vdiv_f32( |
| // CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2 |
| // CHECK: ret <2 x float> [[DIV_I]] |
| float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { |
| return vdiv_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vaba_s8( |
| // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] |
| // CHECK: ret <8 x i8> [[ADD_I]] |
| int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { |
| return vaba_s8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vaba_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> |
| // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] |
| // CHECK: ret <4 x i16> [[ADD_I]] |
| int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { |
| return vaba_s16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vaba_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> |
| // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] |
| // CHECK: ret <2 x i32> [[ADD_I]] |
| int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { |
| return vaba_s32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vaba_u8( |
| // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] |
| // CHECK: ret <8 x i8> [[ADD_I]] |
| uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { |
| return vaba_u8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vaba_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> |
| // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] |
| // CHECK: ret <4 x i16> [[ADD_I]] |
| uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { |
| return vaba_u16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vaba_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> |
| // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] |
| // CHECK: ret <2 x i32> [[ADD_I]] |
| uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { |
| return vaba_u32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vabaq_s8( |
| // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] |
| // CHECK: ret <16 x i8> [[ADD_I]] |
| int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { |
| return vabaq_s8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vabaq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> |
| // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] |
| // CHECK: ret <8 x i16> [[ADD_I]] |
| int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { |
| return vabaq_s16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vabaq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> |
| // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] |
| // CHECK: ret <4 x i32> [[ADD_I]] |
| int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { |
| return vabaq_s32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vabaq_u8( |
| // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] |
| // CHECK: ret <16 x i8> [[ADD_I]] |
| uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { |
| return vabaq_u8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vabaq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> |
| // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] |
| // CHECK: ret <8 x i16> [[ADD_I]] |
| uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { |
| return vabaq_u16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vabaq_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> |
| // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3) |
| // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] |
| // CHECK: ret <4 x i32> [[ADD_I]] |
| uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { |
| return vabaq_u32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vabd_s8( |
| // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) |
| // CHECK: ret <8 x i8> [[VABD_I]] |
| int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { |
| return vabd_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabd_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2) |
| // CHECK: ret <4 x i16> [[VABD2_I]] |
| int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { |
| return vabd_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabd_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2) |
| // CHECK: ret <2 x i32> [[VABD2_I]] |
| int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { |
| return vabd_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabd_u8( |
| // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) |
| // CHECK: ret <8 x i8> [[VABD_I]] |
| uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vabd_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabd_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2) |
| // CHECK: ret <4 x i16> [[VABD2_I]] |
| uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vabd_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabd_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2) |
| // CHECK: ret <2 x i32> [[VABD2_I]] |
| uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vabd_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabd_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2) |
| // CHECK: ret <2 x float> [[VABD2_I]] |
| float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { |
| return vabd_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabdq_s8( |
| // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) |
| // CHECK: ret <16 x i8> [[VABD_I]] |
| int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) { |
| return vabdq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabdq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2) |
| // CHECK: ret <8 x i16> [[VABD2_I]] |
| int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) { |
| return vabdq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabdq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2) |
| // CHECK: ret <4 x i32> [[VABD2_I]] |
| int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { |
| return vabdq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabdq_u8( |
| // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) |
| // CHECK: ret <16 x i8> [[VABD_I]] |
| uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vabdq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabdq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2) |
| // CHECK: ret <8 x i16> [[VABD2_I]] |
| uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vabdq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabdq_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2) |
| // CHECK: ret <4 x i32> [[VABD2_I]] |
| uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vabdq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabdq_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2) |
| // CHECK: ret <4 x float> [[VABD2_I]] |
| float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) { |
| return vabdq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vabdq_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> |
| // CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2) |
| // CHECK: ret <2 x double> [[VABD2_I]] |
| float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { |
| return vabdq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_s8( |
| // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 |
| // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 |
| // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] |
| // CHECK: ret <8 x i8> [[VBSL2_I]] |
| int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { |
| return vbsl_s8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[TMP4]] |
| int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { |
| return vbsl_s16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <2 x i32> [[VBSL5_I]] |
| int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) { |
| return vbsl_s32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <1 x i64> [[VBSL5_I]] |
| uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { |
| return vbsl_s64(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_u8( |
| // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 |
| // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 |
| // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] |
| // CHECK: ret <8 x i8> [[VBSL2_I]] |
| uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { |
| return vbsl_u8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <4 x i16> [[VBSL5_I]] |
| uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { |
| return vbsl_u16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <2 x i32> [[VBSL5_I]] |
| uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { |
| return vbsl_u32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <1 x i64> [[VBSL5_I]] |
| uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { |
| return vbsl_u64(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8> |
| // CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8> |
| // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> |
| // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> |
| // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[TMP0]], [[VBSL1_I]] |
| // CHECK: [[TMP4:%.*]] = xor <2 x i32> [[TMP0]], <i32 -1, i32 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]] |
| // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> |
| // CHECK: ret <2 x float> [[TMP5]] |
| float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { |
| return vbsl_f32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8> |
| // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> |
| // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> |
| // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]] |
| // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] |
| // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double> |
| // CHECK: ret <1 x double> [[TMP4]] |
| float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) { |
| return vbsl_f64(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_p8( |
| // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 |
| // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 |
| // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] |
| // CHECK: ret <8 x i8> [[VBSL2_I]] |
| poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) { |
| return vbsl_p8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbsl_p16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <4 x i16> [[VBSL5_I]] |
| poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) { |
| return vbsl_p16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_s8( |
| // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 |
| // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 |
| // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] |
| // CHECK: ret <16 x i8> [[VBSL2_I]] |
| int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { |
| return vbslq_s8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <8 x i16> [[VBSL5_I]] |
| int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { |
| return vbslq_s16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <4 x i32> [[VBSL5_I]] |
| int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { |
| return vbslq_s32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <2 x i64> [[VBSL5_I]] |
| int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) { |
| return vbslq_s64(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_u8( |
| // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 |
| // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 |
| // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] |
| // CHECK: ret <16 x i8> [[VBSL2_I]] |
| uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { |
| return vbslq_u8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <8 x i16> [[VBSL5_I]] |
| uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { |
| return vbslq_u16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <4 x i32> [[VBSL5_I]] |
| int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { |
| return vbslq_s32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <2 x i64> [[VBSL5_I]] |
| uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) { |
| return vbslq_u64(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> |
| // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> |
| // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> |
| // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]] |
| // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] |
| // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float> |
| // CHECK: ret <4 x float> [[TMP4]] |
| float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { |
| return vbslq_f32(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_p8( |
| // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 |
| // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 |
| // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] |
| // CHECK: ret <16 x i8> [[VBSL2_I]] |
| poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) { |
| return vbslq_p8(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_p16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> |
| // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 |
| // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: ret <8 x i16> [[VBSL5_I]] |
| poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) { |
| return vbslq_p16(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vbslq_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> |
| // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> |
| // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> |
| // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]] |
| // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1> |
| // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] |
| // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] |
| // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double> |
| // CHECK: ret <2 x double> [[TMP4]] |
| float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) { |
| return vbslq_f64(v1, v2, v3); |
| } |
| |
| // CHECK-LABEL: @test_vrecps_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> |
| // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2) |
| // CHECK: ret <2 x float> [[VRECPS_V2_I]] |
| float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) { |
| return vrecps_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrecpsq_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> |
| // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2) |
| // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x float> [[VRECPSQ_V2_I]] |
| float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) { |
| return vrecpsq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrecpsq_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> |
| // CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2) |
| // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x double> [[VRECPSQ_V2_I]] |
| float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) { |
| return vrecpsq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrsqrts_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> |
| // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2) |
| // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x float> [[VRSQRTS_V2_I]] |
| float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) { |
| return vrsqrts_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrsqrtsq_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> |
| // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2) |
| // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]] |
| float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) { |
| return vrsqrtsq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrsqrtsq_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> |
| // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2) |
| // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x double> [[VRSQRTSQ_V2_I]] |
| float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) { |
| return vrsqrtsq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcage_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> |
| // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2) |
| // CHECK: ret <2 x i32> [[VCAGE_V2_I]] |
| uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) { |
| return vcage_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcage_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> |
| // CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) |
| // CHECK: ret <1 x i64> [[VCAGE_V2_I]] |
| uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) { |
| return vcage_f64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcageq_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> |
| // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2) |
| // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]] |
| uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) { |
| return vcageq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcageq_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> |
| // CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2) |
| // CHECK: ret <2 x i64> [[VCAGEQ_V2_I]] |
| uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) { |
| return vcageq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcagt_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> |
| // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2) |
| // CHECK: ret <2 x i32> [[VCAGT_V2_I]] |
| uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) { |
| return vcagt_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcagt_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> |
| // CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) |
| // CHECK: ret <1 x i64> [[VCAGT_V2_I]] |
| uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) { |
| return vcagt_f64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcagtq_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> |
| // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2) |
| // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]] |
| uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) { |
| return vcagtq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcagtq_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> |
| // CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2) |
| // CHECK: ret <2 x i64> [[VCAGTQ_V2_I]] |
| uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) { |
| return vcagtq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcale_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> |
| // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1) |
| // CHECK: ret <2 x i32> [[VCALE_V2_I]] |
| uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) { |
| return vcale_f32(v1, v2); |
| // Using registers other than v0, v1 are possible, but would be odd. |
| } |
| |
| // CHECK-LABEL: @test_vcale_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> |
| // CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) |
| // CHECK: ret <1 x i64> [[VCALE_V2_I]] |
| uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) { |
| return vcale_f64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcaleq_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> |
| // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1) |
| // CHECK: ret <4 x i32> [[VCALEQ_V2_I]] |
| uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) { |
| return vcaleq_f32(v1, v2); |
| // Using registers other than v0, v1 are possible, but would be odd. |
| } |
| |
| // CHECK-LABEL: @test_vcaleq_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> |
| // CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1) |
| // CHECK: ret <2 x i64> [[VCALEQ_V2_I]] |
| uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) { |
| return vcaleq_f64(v1, v2); |
| // Using registers other than v0, v1 are possible, but would be odd. |
| } |
| |
| // CHECK-LABEL: @test_vcalt_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> |
| // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1) |
| // CHECK: ret <2 x i32> [[VCALT_V2_I]] |
| uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) { |
| return vcalt_f32(v1, v2); |
| // Using registers other than v0, v1 are possible, but would be odd. |
| } |
| |
| // CHECK-LABEL: @test_vcalt_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> |
| // CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) |
| // CHECK: ret <1 x i64> [[VCALT_V2_I]] |
| uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) { |
| return vcalt_f64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcaltq_f32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> |
| // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1) |
| // CHECK: ret <4 x i32> [[VCALTQ_V2_I]] |
| uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) { |
| return vcaltq_f32(v1, v2); |
| // Using registers other than v0, v1 are possible, but would be odd. |
| } |
| |
| // CHECK-LABEL: @test_vcaltq_f64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> |
| // CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1) |
| // CHECK: ret <2 x i64> [[VCALTQ_V2_I]] |
| uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) { |
| return vcaltq_f64(v1, v2); |
| // Using registers other than v0, v1 are possible, but would be odd. |
| } |
| |
| // CHECK-LABEL: @test_vtst_s8( |
| // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 |
| // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[VTST_I]] |
| uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) { |
| return vtst_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtst_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[VTST_I]] |
| uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) { |
| return vtst_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtst_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[VTST_I]] |
| uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) { |
| return vtst_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtst_u8( |
| // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 |
| // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[VTST_I]] |
| uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vtst_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtst_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[VTST_I]] |
| uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vtst_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtst_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[VTST_I]] |
| uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vtst_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtstq_s8( |
| // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 |
| // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[VTST_I]] |
| uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) { |
| return vtstq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtstq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[VTST_I]] |
| uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) { |
| return vtstq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtstq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[VTST_I]] |
| uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) { |
| return vtstq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtstq_u8( |
| // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 |
| // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[VTST_I]] |
| uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vtstq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtstq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[VTST_I]] |
| uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vtstq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtstq_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[VTST_I]] |
| uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vtstq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtstq_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[VTST_I]] |
| uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) { |
| return vtstq_s64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtstq_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[VTST_I]] |
| uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) { |
| return vtstq_u64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtst_p8( |
| // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 |
| // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[VTST_I]] |
| uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) { |
| return vtst_p8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtst_p16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[VTST_I]] |
| uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) { |
| return vtst_p16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtstq_p8( |
| // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 |
| // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[VTST_I]] |
| uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) { |
| return vtstq_p8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtstq_p16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2 |
| // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[VTST_I]] |
| uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) { |
| return vtstq_p16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vtst_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b |
| // CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[VTST_I]] |
| uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) { |
| return vtst_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vtst_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
| // CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b |
| // CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer |
| // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[VTST_I]] |
| uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) { |
| return vtst_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vceq_s8( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) { |
| return vceq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceq_s16( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[SEXT_I]] |
| uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) { |
| return vceq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceq_s32( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) { |
| return vceq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceq_s64( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) { |
| return vceq_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vceq_u64( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) { |
| return vceq_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vceq_f32( |
| // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) { |
| return vceq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceq_f64( |
| // CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) { |
| return vceq_f64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vceq_u8( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vceq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceq_u16( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[SEXT_I]] |
| uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vceq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceq_u32( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vceq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceq_p8( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) { |
| return vceq_p8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_s8( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) { |
| return vceqq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_s16( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[SEXT_I]] |
| uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) { |
| return vceqq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_s32( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) { |
| return vceqq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_f32( |
| // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) { |
| return vceqq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_u8( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vceqq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_u16( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[SEXT_I]] |
| uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vceqq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_u32( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vceqq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_p8( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) { |
| return vceqq_p8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_s64( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) { |
| return vceqq_s64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_u64( |
| // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) { |
| return vceqq_u64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vceqq_f64( |
| // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) { |
| return vceqq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcge_s8( |
| // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) { |
| return vcge_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcge_s16( |
| // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[SEXT_I]] |
| uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) { |
| return vcge_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcge_s32( |
| // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) { |
| return vcge_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcge_s64( |
| // CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) { |
| return vcge_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcge_u64( |
| // CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) { |
| return vcge_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcge_f32( |
| // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) { |
| return vcge_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcge_f64( |
| // CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) { |
| return vcge_f64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcge_u8( |
| // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vcge_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcge_u16( |
| // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[SEXT_I]] |
| uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vcge_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcge_u32( |
| // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vcge_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgeq_s8( |
| // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) { |
| return vcgeq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgeq_s16( |
| // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[SEXT_I]] |
| uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) { |
| return vcgeq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgeq_s32( |
| // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) { |
| return vcgeq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgeq_f32( |
| // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) { |
| return vcgeq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgeq_u8( |
| // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vcgeq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgeq_u16( |
| // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[SEXT_I]] |
| uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vcgeq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgeq_u32( |
| // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vcgeq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgeq_s64( |
| // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) { |
| return vcgeq_s64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgeq_u64( |
| // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) { |
| return vcgeq_u64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgeq_f64( |
| // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) { |
| return vcgeq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcle_s8( |
| // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| // Notes about vcle: |
| // LE condition predicate implemented as GE, so check reversed operands. |
| // Using registers other than v0, v1 are possible, but would be odd. |
| uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) { |
| return vcle_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcle_s16( |
| // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[SEXT_I]] |
| uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) { |
| return vcle_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcle_s32( |
| // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) { |
| return vcle_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcle_s64( |
| // CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) { |
| return vcle_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcle_u64( |
| // CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) { |
| return vcle_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcle_f32( |
| // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) { |
| return vcle_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcle_f64( |
| // CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) { |
| return vcle_f64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcle_u8( |
| // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vcle_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcle_u16( |
| // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[SEXT_I]] |
| uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vcle_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcle_u32( |
| // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vcle_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcleq_s8( |
| // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) { |
| return vcleq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcleq_s16( |
| // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[SEXT_I]] |
| uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) { |
| return vcleq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcleq_s32( |
| // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) { |
| return vcleq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcleq_f32( |
| // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) { |
| return vcleq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcleq_u8( |
| // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vcleq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcleq_u16( |
| // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[SEXT_I]] |
| uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vcleq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcleq_u32( |
| // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vcleq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcleq_s64( |
| // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) { |
| return vcleq_s64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcleq_u64( |
| // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) { |
| return vcleq_u64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcleq_f64( |
| // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) { |
| return vcleq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgt_s8( |
| // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) { |
| return vcgt_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgt_s16( |
| // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[SEXT_I]] |
| uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) { |
| return vcgt_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgt_s32( |
| // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) { |
| return vcgt_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgt_s64( |
| // CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) { |
| return vcgt_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcgt_u64( |
| // CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) { |
| return vcgt_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcgt_f32( |
| // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) { |
| return vcgt_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgt_f64( |
| // CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) { |
| return vcgt_f64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vcgt_u8( |
| // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vcgt_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgt_u16( |
| // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[SEXT_I]] |
| uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vcgt_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgt_u32( |
| // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vcgt_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgtq_s8( |
| // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) { |
| return vcgtq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgtq_s16( |
| // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[SEXT_I]] |
| uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) { |
| return vcgtq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgtq_s32( |
| // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) { |
| return vcgtq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgtq_f32( |
| // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) { |
| return vcgtq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgtq_u8( |
| // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vcgtq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgtq_u16( |
| // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[SEXT_I]] |
| uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vcgtq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgtq_u32( |
| // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vcgtq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgtq_s64( |
| // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) { |
| return vcgtq_s64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgtq_u64( |
| // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) { |
| return vcgtq_u64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcgtq_f64( |
| // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) { |
| return vcgtq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vclt_s8( |
| // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| // Notes about vclt: |
| // LT condition predicate implemented as GT, so check reversed operands. |
| // Using registers other than v0, v1 are possible, but would be odd. |
| uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) { |
| return vclt_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vclt_s16( |
| // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[SEXT_I]] |
| uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) { |
| return vclt_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vclt_s32( |
| // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) { |
| return vclt_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vclt_s64( |
| // CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) { |
| return vclt_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vclt_u64( |
| // CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) { |
| return vclt_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vclt_f32( |
| // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) { |
| return vclt_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vclt_f64( |
| // CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b |
| // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> |
| // CHECK: ret <1 x i64> [[SEXT_I]] |
| uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) { |
| return vclt_f64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vclt_u8( |
| // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> |
| // CHECK: ret <8 x i8> [[SEXT_I]] |
| uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vclt_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vclt_u16( |
| // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> |
| // CHECK: ret <4 x i16> [[SEXT_I]] |
| uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vclt_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vclt_u32( |
| // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> |
| // CHECK: ret <2 x i32> [[SEXT_I]] |
| uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vclt_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcltq_s8( |
| // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) { |
| return vcltq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcltq_s16( |
| // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[SEXT_I]] |
| uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) { |
| return vcltq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcltq_s32( |
| // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) { |
| return vcltq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcltq_f32( |
| // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) { |
| return vcltq_f32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcltq_u8( |
| // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> |
| // CHECK: ret <16 x i8> [[SEXT_I]] |
| uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vcltq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcltq_u16( |
| // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> |
| // CHECK: ret <8 x i16> [[SEXT_I]] |
| uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vcltq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcltq_u32( |
| // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> |
| // CHECK: ret <4 x i32> [[SEXT_I]] |
| uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vcltq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcltq_s64( |
| // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) { |
| return vcltq_s64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcltq_u64( |
| // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) { |
| return vcltq_u64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vcltq_f64( |
| // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2 |
| // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> |
| // CHECK: ret <2 x i64> [[SEXT_I]] |
| uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) { |
| return vcltq_f64(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhadd_s8( |
| // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) |
| // CHECK: ret <8 x i8> [[VHADD_V_I]] |
| int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) { |
| return vhadd_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhadd_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) |
| // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VHADD_V2_I]] |
| int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) { |
| return vhadd_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhadd_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) |
| // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VHADD_V2_I]] |
| int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) { |
| return vhadd_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhadd_u8( |
| // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) |
| // CHECK: ret <8 x i8> [[VHADD_V_I]] |
| uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vhadd_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhadd_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) |
| // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VHADD_V2_I]] |
| uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vhadd_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhadd_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) |
| // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VHADD_V2_I]] |
| uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vhadd_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhaddq_s8( |
| // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) |
| // CHECK: ret <16 x i8> [[VHADDQ_V_I]] |
| int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) { |
| return vhaddq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhaddq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) |
| // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VHADDQ_V2_I]] |
| int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) { |
| return vhaddq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhaddq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) |
| // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VHADDQ_V2_I]] |
| int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) { |
| return vhaddq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhaddq_u8( |
| // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) |
| // CHECK: ret <16 x i8> [[VHADDQ_V_I]] |
| uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vhaddq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhaddq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) |
| // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VHADDQ_V2_I]] |
| uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vhaddq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhaddq_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) |
| // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VHADDQ_V2_I]] |
| uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vhaddq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsub_s8( |
| // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) |
| // CHECK: ret <8 x i8> [[VHSUB_V_I]] |
| int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) { |
| return vhsub_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsub_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2) |
| // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VHSUB_V2_I]] |
| int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) { |
| return vhsub_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsub_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2) |
| // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VHSUB_V2_I]] |
| int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) { |
| return vhsub_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsub_u8( |
| // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) |
| // CHECK: ret <8 x i8> [[VHSUB_V_I]] |
| uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vhsub_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsub_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2) |
| // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VHSUB_V2_I]] |
| uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vhsub_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsub_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2) |
| // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VHSUB_V2_I]] |
| uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vhsub_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsubq_s8( |
| // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) |
| // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] |
| int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) { |
| return vhsubq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsubq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2) |
| // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] |
| int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) { |
| return vhsubq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsubq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2) |
| // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] |
| int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) { |
| return vhsubq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsubq_u8( |
| // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) |
| // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] |
| uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vhsubq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsubq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2) |
| // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] |
| uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vhsubq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vhsubq_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2) |
| // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] |
| uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vhsubq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhadd_s8( |
| // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) |
| // CHECK: ret <8 x i8> [[VRHADD_V_I]] |
| int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) { |
| return vrhadd_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhadd_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) |
| // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VRHADD_V2_I]] |
| int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) { |
| return vrhadd_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhadd_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) |
| // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VRHADD_V2_I]] |
| int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) { |
| return vrhadd_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhadd_u8( |
| // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) |
| // CHECK: ret <8 x i8> [[VRHADD_V_I]] |
| uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) { |
| return vrhadd_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhadd_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> |
| // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) |
| // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VRHADD_V2_I]] |
| uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) { |
| return vrhadd_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhadd_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> |
| // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) |
| // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VRHADD_V2_I]] |
| uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) { |
| return vrhadd_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhaddq_s8( |
| // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) |
| // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] |
| int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) { |
| return vrhaddq_s8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhaddq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) |
| // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] |
| int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) { |
| return vrhaddq_s16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhaddq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) |
| // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] |
| int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) { |
| return vrhaddq_s32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhaddq_u8( |
| // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) |
| // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] |
| uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) { |
| return vrhaddq_u8(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhaddq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> |
| // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) |
| // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] |
| uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) { |
| return vrhaddq_u16(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vrhaddq_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> |
| // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) |
| // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] |
| uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) { |
| return vrhaddq_u32(v1, v2); |
| } |
| |
| // CHECK-LABEL: @test_vqadd_s8( |
| // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) |
| // CHECK: ret <8 x i8> [[VQADD_V_I]] |
| int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { |
| return vqadd_s8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqadd_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
| // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b) |
| // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VQADD_V2_I]] |
| int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { |
| return vqadd_s16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqadd_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
| // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b) |
| // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VQADD_V2_I]] |
| int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { |
| return vqadd_s32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqadd_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
| // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b) |
| // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> |
| // CHECK: ret <1 x i64> [[VQADD_V2_I]] |
| int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { |
| return vqadd_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqadd_u8( |
| // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) |
| // CHECK: ret <8 x i8> [[VQADD_V_I]] |
| uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { |
| return vqadd_u8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqadd_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
| // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b) |
| // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VQADD_V2_I]] |
| uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { |
| return vqadd_u16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqadd_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
| // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b) |
| // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VQADD_V2_I]] |
| uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { |
| return vqadd_u32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqadd_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
| // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b) |
| // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> |
| // CHECK: ret <1 x i64> [[VQADD_V2_I]] |
| uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { |
| return vqadd_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqaddq_s8( |
| // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) |
| // CHECK: ret <16 x i8> [[VQADDQ_V_I]] |
| int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { |
| return vqaddq_s8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqaddq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
| // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b) |
| // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] |
| int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { |
| return vqaddq_s16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqaddq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
| // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b) |
| // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] |
| int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { |
| return vqaddq_s32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqaddq_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
| // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b) |
| // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] |
| int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { |
| return vqaddq_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqaddq_u8( |
| // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) |
| // CHECK: ret <16 x i8> [[VQADDQ_V_I]] |
| uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { |
| return vqaddq_u8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqaddq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
| // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b) |
| // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] |
| uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { |
| return vqaddq_u16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqaddq_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
| // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b) |
| // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] |
| uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { |
| return vqaddq_u32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqaddq_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
| // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b) |
| // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] |
| uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { |
| return vqaddq_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsub_s8( |
| // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) |
| // CHECK: ret <8 x i8> [[VQSUB_V_I]] |
| int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { |
| return vqsub_s8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsub_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
| // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b) |
| // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VQSUB_V2_I]] |
| int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { |
| return vqsub_s16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsub_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
| // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b) |
| // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VQSUB_V2_I]] |
| int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { |
| return vqsub_s32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsub_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
| // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b) |
| // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> |
| // CHECK: ret <1 x i64> [[VQSUB_V2_I]] |
| int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { |
| return vqsub_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsub_u8( |
| // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) |
| // CHECK: ret <8 x i8> [[VQSUB_V_I]] |
| uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { |
| return vqsub_u8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsub_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
| // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b) |
| // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VQSUB_V2_I]] |
| uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { |
| return vqsub_u16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsub_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
| // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b) |
| // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VQSUB_V2_I]] |
| uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { |
| return vqsub_u32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsub_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
| // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b) |
| // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> |
| // CHECK: ret <1 x i64> [[VQSUB_V2_I]] |
| uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { |
| return vqsub_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsubq_s8( |
| // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) |
| // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] |
| int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { |
| return vqsubq_s8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsubq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
| // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b) |
| // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] |
| int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { |
| return vqsubq_s16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsubq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
| // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b) |
| // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] |
| int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { |
| return vqsubq_s32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsubq_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
| // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b) |
| // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] |
| int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { |
| return vqsubq_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsubq_u8( |
| // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) |
| // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] |
| uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { |
| return vqsubq_u8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsubq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
| // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b) |
| // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] |
| uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { |
| return vqsubq_u16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsubq_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
| // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b) |
| // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] |
| uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { |
| return vqsubq_u32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqsubq_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
| // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b) |
| // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] |
| uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { |
| return vqsubq_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshl_s8( |
| // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) |
| // CHECK: ret <8 x i8> [[VSHL_V_I]] |
| int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { |
| return vshl_s8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshl_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
| // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b) |
| // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VSHL_V2_I]] |
| int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { |
| return vshl_s16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshl_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
| // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b) |
| // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VSHL_V2_I]] |
| int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { |
| return vshl_s32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshl_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
| // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b) |
| // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <1 x i64> [[VSHL_V2_I]] |
| int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { |
| return vshl_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshl_u8( |
| // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) |
| // CHECK: ret <8 x i8> [[VSHL_V_I]] |
| uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { |
| return vshl_u8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshl_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
| // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b) |
| // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VSHL_V2_I]] |
| uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { |
| return vshl_u16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshl_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
| // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b) |
| // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VSHL_V2_I]] |
| uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { |
| return vshl_u32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshl_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
| // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b) |
| // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <1 x i64> [[VSHL_V2_I]] |
| uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { |
| return vshl_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshlq_s8( |
| // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) |
| // CHECK: ret <16 x i8> [[VSHLQ_V_I]] |
| int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { |
| return vshlq_s8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshlq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
| // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b) |
| // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VSHLQ_V2_I]] |
| int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { |
| return vshlq_s16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshlq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
| // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b) |
| // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VSHLQ_V2_I]] |
| int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { |
| return vshlq_s32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshlq_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
| // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b) |
| // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x i64> [[VSHLQ_V2_I]] |
| int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { |
| return vshlq_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshlq_u8( |
| // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) |
| // CHECK: ret <16 x i8> [[VSHLQ_V_I]] |
| uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { |
| return vshlq_u8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshlq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
| // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b) |
| // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VSHLQ_V2_I]] |
| uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { |
| return vshlq_u16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshlq_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
| // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b) |
| // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VSHLQ_V2_I]] |
| uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { |
| return vshlq_u32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vshlq_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
| // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b) |
| // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x i64> [[VSHLQ_V2_I]] |
| uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { |
| return vshlq_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshl_s8( |
| // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) |
| // CHECK: ret <8 x i8> [[VQSHL_V_I]] |
| int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { |
| return vqshl_s8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshl_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
| // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b) |
| // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VQSHL_V2_I]] |
| int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { |
| return vqshl_s16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshl_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
| // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b) |
| // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VQSHL_V2_I]] |
| int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { |
| return vqshl_s32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshl_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
| // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b) |
| // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <1 x i64> [[VQSHL_V2_I]] |
| int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { |
| return vqshl_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshl_u8( |
| // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) |
| // CHECK: ret <8 x i8> [[VQSHL_V_I]] |
| uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { |
| return vqshl_u8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshl_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
| // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b) |
| // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <4 x i16> [[VQSHL_V2_I]] |
| uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { |
| return vqshl_u16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshl_u32( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
| // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b) |
| // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <2 x i32> [[VQSHL_V2_I]] |
| uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { |
| return vqshl_u32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshl_u64( |
| // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> |
| // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b) |
| // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> |
| // CHECK: ret <1 x i64> [[VQSHL_V2_I]] |
| uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { |
| return vqshl_u64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshlq_s8( |
| // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) |
| // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] |
| int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { |
| return vqshlq_s8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshlq_s16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
| // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b) |
| // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> |
| // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] |
| int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { |
| return vqshlq_s16(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshlq_s32( |
| // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
| // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b) |
| // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> |
| // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] |
| int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { |
| return vqshlq_s32(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshlq_s64( |
| // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
| // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b) |
| // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] |
| int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { |
| return vqshlq_s64(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshlq_u8( |
| // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) |
| // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] |
| uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { |
| return vqshlq_u8(a, b); |
| } |
| |
| // CHECK-LABEL: @test_vqshlq_u16( |
| // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
| // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
|