| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py |
| // REQUIRES: powerpc-registered-target |
| |
| // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ |
| // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE |
| // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ |
| // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-LE |
| // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \ |
| // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-BE |
| // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \ |
| // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n| FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE |
| |
| // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ |
| // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE |
| // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ |
| // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-LE |
| // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \ |
| // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-BE |
| // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \ |
| // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n| FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE |
| |
| #include <mmintrin.h> |
| |
| unsigned long long int ull1, ull2; |
| int i1, i2; |
| short s[4]; |
| signed char c[8]; |
| long long int ll1; |
| __m64 m1, m2, res; |
| |
| void __attribute__((noinline)) |
| test_add() { |
| res = _mm_add_pi32(m1, m2); |
| res = _mm_add_pi16(m1, m2); |
| res = _mm_add_pi8(m1, m2); |
| res = _mm_adds_pu16(m1, m2); |
| res = _mm_adds_pu8(m1, m2); |
| res = _mm_adds_pi16(m1, m2); |
| res = _mm_adds_pi8(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_add |
| |
| // CHECK: define available_externally i64 @_mm_add_pi32 |
| |
| // CHECK-P9: [[REG1:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-P9-NEXT: [[REG2:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1]] to <4 x i32> |
| // CHECK-P9-NEXT: store <4 x i32> [[REG2]], <4 x i32>* [[REG3:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-P9-NEXT: [[REG4:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-P9-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-P9-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG5]] to <4 x i32> |
| // CHECK-P9-NEXT: store <4 x i32> [[REG6]], <4 x i32>* [[REG7:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-P9-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG3]], align 16 |
| // CHECK-P9-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG7]], align 16 |
| // CHECK-P9-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_add(int vector[4], int vector[4])(<4 x i32> [[REG8]], <4 x i32> [[REG9]]) |
| // CHECK-P9-NEXT: store <4 x i32> [[REG10]], <4 x i32>* [[REG11:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-P9-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG11]], align 16 |
| // CHECK-P9-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> %6 to <2 x i64> |
| // CHECK-P9-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG13]], i32 0 |
| // CHECK-P9-NEXT: ret i64 [[REG14]] |
| |
| // CHECK-P8: [[REG15:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-P8-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG15]] |
| // CHECK-P8: [[REG17:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-P8-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG17]] |
| // CHECK-P8-NEXT: add nsw i32 [[REG16]], [[REG18]] |
| // CHECK-P8: [[REG19:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-P8-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG19]] |
| // CHECK-P8: [[REG21:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-P8-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG21]] |
| // CHECK-P8-NEXT: add nsw i32 [[REG20]], [[REG22]] |
| |
| // CHECK: define available_externally i64 @_mm_add_pi16 |
| // CHECK: [[REG23:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG23]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG24]], <8 x i16>* [[REG25:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG26:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG26]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG27]], <8 x i16>* [[REG28:[0-9a-zA-Z_%.]+]] |
| // CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG25]], align 16 |
| // CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG28]], align 16 |
| // CHECK-NEXT: call <8 x i16> @vec_add(short vector[8], short vector[8])(<8 x i16> [[REG29]], <8 x i16> [[REG30]]) |
| |
| // CHECK: define available_externally i64 @_mm_add_pi8 |
| // CHECK: [[REG31:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG31]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG32]], <16 x i8>* [[REG33:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG34:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG34]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG35]], <16 x i8>* [[REG36:[0-9a-zA-Z_%.]+]] |
| // CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG33]], align 16 |
| // CHECK-NEXT: [[REG38:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG36]], align 16 |
| // CHECK-NEXT: call <16 x i8> @vec_add(signed char vector[16], signed char vector[16])(<16 x i8> [[REG37]], <16 x i8> [[REG38]]) |
| |
| // CHECK: define available_externally i64 @_mm_adds_pu16 |
| // CHECK: [[REG39:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG39]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG40]], <8 x i16>* [[REG41:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG42:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG42]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG43]], <8 x i16>* [[REG44:[0-9a-zA-Z_%.]+]] |
| // CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG41]], align 16 |
| // CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG44]], align 16 |
| // CHECK-NEXT: call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG45]], <8 x i16> [[REG46]]) |
| |
| // CHECK: define available_externally i64 @_mm_adds_pu8 |
| // CHECK: [[REG47:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG47]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG48]], <16 x i8>* [[REG49:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG50:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG51:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG50]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG51]], <16 x i8>* [[REG52:[0-9a-zA-Z_%.]+]] |
| // CHECK-NEXT: [[REG53:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG49]], align 16 |
| // CHECK-NEXT: [[REG54:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG52]], align 16 |
| // CHECK-NEXT: call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG53]], <16 x i8> [[REG54]]) |
| |
| // CHECK: define available_externally i64 @_mm_adds_pi16 |
| // CHECK: [[REG55:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG56:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG55]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG56]], <8 x i16>* [[REG57:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG58:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG59:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG58]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG59]], <8 x i16>* [[REG60:[0-9a-zA-Z_%.]+]] |
| // CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG57]], align 16 |
| // CHECK-NEXT: [[REG62:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG60]], align 16 |
| // CHECK-NEXT: call <8 x i16> @vec_adds(short vector[8], short vector[8])(<8 x i16> [[REG61]], <8 x i16> [[REG62]]) |
| |
| // CHECK: define available_externally i64 @_mm_adds_pi8 |
| // CHECK: [[REG63:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG64:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG63]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG64]], <16 x i8>* [[REG65:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG66:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats |
| // CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG66]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG67]], <16 x i8>* [[REG68:[0-9a-zA-Z_%.]+]] |
| // CHECK-NEXT: [[REG69:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG65]], align 16 |
| // CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG68]], align 16 |
| // CHECK-NEXT: call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16])(<16 x i8> [[REG69]], <16 x i8> [[REG70]]) |
| |
| void __attribute__((noinline)) |
| test_alt_name_add() { |
| res = _m_paddb(m1, m2); |
| res = _m_paddd(m1, m2); |
| res = _m_paddsb(m1, m2); |
| res = _m_paddsw(m1, m2); |
| res = _m_paddusb(m1, m2); |
| res = _m_paddusw(m1, m2); |
| res = _m_paddw(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_add |
| |
| // CHECK: define available_externally i64 @_m_paddb |
| // CHECK: [[REG71:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi8 |
| // CHECK-NEXT: ret i64 [[REG71]] |
| |
| // CHECK: define available_externally i64 @_m_paddd |
| // CHECK: [[REG72:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi32 |
| // CHECK-NEXT: ret i64 [[REG72]] |
| |
| // CHECK: define available_externally i64 @_m_paddsb |
| // CHECK: [[REG73:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pi8 |
| // CHECK-NEXT: ret i64 [[REG73]] |
| |
| // CHECK: define available_externally i64 @_m_paddsw |
| // CHECK: [[REG74:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pi16 |
| // CHECK-NEXT: ret i64 [[REG74]] |
| |
| // CHECK: define available_externally i64 @_m_paddusb |
| // CHECK: [[REG75:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pu8 |
| // CHECK-NEXT: ret i64 [[REG75]] |
| |
| // CHECK: define available_externally i64 @_m_paddusw |
| // CHECK: [[REG76:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pu16 |
| // CHECK-NEXT: ret i64 [[REG76]] |
| |
| // CHECK: define available_externally i64 @_m_paddw |
| // CHECK: [[REG77:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi16 |
| // CHECK-NEXT: ret i64 [[REG77]] |
| |
| void __attribute__((noinline)) |
| test_cmp() { |
| res = _mm_cmpeq_pi32(m1, m2); |
| res = _mm_cmpeq_pi16(m1, m2); |
| res = _mm_cmpeq_pi8(m1, m2); |
| res = _mm_cmpgt_pi32(m1, m2); |
| res = _mm_cmpgt_pi16(m1, m2); |
| res = _mm_cmpgt_pi8(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_cmp |
| |
| // CHECK: define available_externally i64 @_mm_cmpeq_pi32 |
| |
| // CHECK-P9: [[REG78:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-P9-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG78]] to <4 x i32> |
| // CHECK-P9-NEXT: store <4 x i32> [[REG79]], <4 x i32>* [[REG80:[0-9a-zA-Z_%.]+]] |
| // CHECK-P9: [[REG81:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-P9-NEXT: [[REG82:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG81]] to <4 x i32> |
| // CHECK-P9-NEXT: store <4 x i32> [[REG82]], <4 x i32>* [[REG83:[0-9a-zA-Z_%.]+]] |
| // CHECK-P9-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG80]] |
| // CHECK-P9-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG83]] |
| // CHECK-P9-NEXT: call <4 x i32> @vec_cmpeq(int vector[4], int vector[4])(<4 x i32> [[REG84]], <4 x i32> [[REG85]]) |
| |
| // CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-P8: [[REG86:[0-9a-zA-Z_%.]+]] = icmp eq i32 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}} |
| // CHECK-P8: [[REG87:[0-9a-zA-Z_%.]+]] = select i1 [[REG86]], i32 -1, i32 0 |
| // CHECK-P8: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-P8-NEXT: store i32 [[REG87]], i32* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-P8: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}} |
| |
| // CHECK: define available_externally i64 @_mm_cmpeq_pi16 |
| // CHECK: [[REG88:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG89:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG88]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG89]], <8 x i16>* [[REG90:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG91:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG92:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG91]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG92]], <8 x i16>* [[REG93:[0-9a-zA-Z_%.]+]] |
| // CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG90]], align 16 |
| // CHECK-NEXT: [[REG95:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG93]], align 16 |
| // CHECK-NEXT: call <8 x i16> @vec_cmpeq(short vector[8], short vector[8])(<8 x i16> [[REG94]], <8 x i16> [[REG95]]) |
| |
| // CHECK: define available_externally i64 @_mm_cmpeq_pi8 |
| // CHECK: call i64 asm "cmpb $0,$1,$2;\0A", "=r,r,r" |
| // CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG96:[0-9a-zA-Z_%.]+]], align 8 |
| // CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG96]], align 8 |
| // CHECK-NEXT: ret i64 [[REG97]] |
| |
| // CHECK: define available_externally i64 @_mm_cmpgt_pi32 |
| |
| // CHECK-P9: [[REG98:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-P9-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG98]] to <4 x i32> |
| // CHECK-P9-NEXT: store <4 x i32> [[REG99]], <4 x i32>* [[REG100:[0-9a-zA-Z_%.]+]] |
| // CHECK-P9: [[REG101:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-P9-NEXT: [[REG102:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG101]] to <4 x i32> |
| // CHECK-P9-NEXT: store <4 x i32> [[REG102]], <4 x i32>* [[REG103:[0-9a-zA-Z_%.]+]] |
| // CHECK-P9-NEXT: [[REG104:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG100]] |
| // CHECK-P9-NEXT: [[REG105:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG103]] |
| // CHECK-P9-NEXT: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])(<4 x i32> [[REG104]], <4 x i32> [[REG85]]) |
| |
| // CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-P8: [[REG106:[0-9a-zA-Z_%.]+]] = icmp sgt i32 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}} |
| // CHECK-P8: [[REG107:[0-9a-zA-Z_%.]+]] = select i1 [[REG106]], i32 -1, i32 0 |
| // CHECK-P8: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-P8-NEXT: store i32 [[REG107]], i32* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-P8: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}} |
| |
| // CHECK: define available_externally i64 @_mm_cmpgt_pi16 |
| // CHECK: [[REG108:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG109:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG108]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG109]], <8 x i16>* [[REG110:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG111:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG112:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG111]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG112]], <8 x i16>* [[REG113:[0-9a-zA-Z_%.]+]] |
| // CHECK-NEXT: [[REG114:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG110]] |
| // CHECK-NEXT: [[REG115:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG113]] |
| // CHECK-NEXT: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> [[REG114]], <8 x i16> [[REG115]]) |
| |
| // CHECK: define available_externally i64 @_mm_cmpgt_pi8 |
| // CHECK: [[REG116:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG117:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG116]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG117]], <16 x i8>* [[REG118:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG119:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG120:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG119]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG120]], <16 x i8>* [[REG121:[0-9a-zA-Z_%.]+]] |
| // CHECK-NEXT: [[REG122:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG118]] |
| // CHECK-NEXT: [[REG123:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG121]] |
| // CHECK-NEXT: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])(<16 x i8> [[REG122]], <16 x i8> [[REG123]]) |
| |
| void __attribute__((noinline)) |
| test_alt_name_cmp() { |
| res = _m_pcmpeqb(m1, m2); |
| res = _m_pcmpeqd(m1, m2); |
| res = _m_pcmpeqw(m1, m2); |
| res = _m_pcmpgtb(m1, m2); |
| res = _m_pcmpgtd(m1, m2); |
| res = _m_pcmpgtw(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_cmp |
| |
| // CHECK: define available_externally i64 @_m_pcmpeqb |
| // CHECK: [[REG124:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi8 |
| // CHECK-NEXT: ret i64 [[REG124]] |
| |
| // CHECK: define available_externally i64 @_m_pcmpeqd |
| // CHECK: [[REG125:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi32 |
| // CHECK-NEXT: ret i64 [[REG125]] |
| |
| // CHECK: define available_externally i64 @_m_pcmpeqw |
| // CHECK: [[REG126:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi16 |
| // CHECK-NEXT: ret i64 [[REG126]] |
| |
| // CHECK: define available_externally i64 @_m_pcmpgtb |
| // CHECK: [[REG127:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi8 |
| // CHECK-NEXT: ret i64 [[REG127]] |
| |
| // CHECK: define available_externally i64 @_m_pcmpgtd |
| // CHECK: [[REG128:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi32 |
| // CHECK-NEXT: ret i64 [[REG128]] |
| |
| // CHECK: define available_externally i64 @_m_pcmpgtw |
| // CHECK: [[REG129:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi16 |
| // CHECK-NEXT: ret i64 [[REG129]] |
| |
| void __attribute__((noinline)) |
| test_convert() { |
| ll1 = _mm_cvtm64_si64(m1); |
| m1 = _mm_cvtsi32_si64(i1); |
| m1 = _mm_cvtsi64_m64(ll1); |
| i1 = _mm_cvtsi64_si32(m1); |
| } |
| |
| // CHECK-LABEL: @test_convert |
| |
| // CHECK: define available_externally i64 @_mm_cvtm64_si64 |
| // CHECK: [[REG130:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: ret i64 [[REG130]] |
| |
| // CHECK: define available_externally i64 @_mm_cvtsi32_si64 |
| // CHECK: [[REG131:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-NEXT: [[REG132:[0-9a-zA-Z_%.]+]] = zext i32 [[REG131]] to i64 |
| // CHECK-NEXT: ret i64 [[REG132]] |
| |
| // CHECK: define available_externally i64 @_mm_cvtsi64_m64 |
| // CHECK: [[REG133:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: ret i64 [[REG133]] |
| |
| // CHECK: define available_externally signext i32 @_mm_cvtsi64_si32 |
| // CHECK: [[REG134:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG135:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG134]] to i32 |
| // CHECK-NEXT: ret i32 [[REG135]] |
| |
| void __attribute__((noinline)) |
| test_alt_name_convert() { |
| m1 = _m_from_int(i1); |
| m1 = _m_from_int64(ll1); |
| i1 = _m_to_int(m1); |
| ll1 = _m_to_int64(m1); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_convert |
| |
| // CHECK: define available_externally i64 @_m_from_int |
| // CHECK: [[REG136:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvtsi32_si64 |
| // CHECK-NEXT: ret i64 [[REG136]] |
| |
| // CHECK: define available_externally i64 @_m_from_int64 |
| // CHECK: [[REG137:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-NEXT: ret i64 [[REG137]] |
| |
| // CHECK: define available_externally signext i32 @_m_to_int |
| // CHECK: [[REG138:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvtsi64_si32 |
| // CHECK-NEXT: ret i32 [[REG138]] |
| |
| // CHECK: define available_externally i64 @_m_to_int64 |
| // CHECK: [[REG139:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-NEXT: ret i64 [[REG139]] |
| |
| void __attribute__((noinline)) |
| test_empty() { |
| _mm_empty(); |
| _m_empty(); |
| } |
| |
| // CHECK-LABEL: @test_empty |
| |
| // CHECK: define available_externally void @_mm_empty |
| // CHECK-NEXT: entry |
| // CHECK-NEXT: ret void |
| |
| // CHECK: define available_externally void @_m_empty |
| // CHECK-NEXT: entry |
| // CHECK-NEXT: ret void |
| |
| void __attribute__((noinline)) |
| test_logic() { |
| res = _mm_and_si64(m1, m2); |
| res = _mm_andnot_si64(m1, m2); |
| res = _mm_or_si64(m1, m2); |
| res = _mm_xor_si64(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_logic |
| |
| // CHECK: define available_externally i64 @_mm_and_si64 |
| // CHECK: [[REG140:[0-9a-zA-Z_%.]+]] = and i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}} |
| // CHECK-NEXT: ret i64 [[REG140]] |
| |
| // CHECK: define available_externally i64 @_mm_andnot_si64 |
| // CHECK: [[REG141:[0-9a-zA-Z_%.]+]] = xor i64 {{[0-9a-zA-Z_%.]+}}, -1 |
| // CHECK: [[REG142:[0-9a-zA-Z_%.]+]] = and i64 [[REG141]], {{[0-9a-zA-Z_%.]+}} |
| // CHECK-NEXT: ret i64 [[REG142]] |
| |
| // CHECK: define available_externally i64 @_mm_or_si64 |
| // CHECK: [[REG143:[0-9a-zA-Z_%.]+]] = or i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}} |
| // CHECK-NEXT: ret i64 [[REG143]] |
| |
| // CHECK: define available_externally i64 @_mm_xor_si64 |
| // CHECK: [[REG144:[0-9a-zA-Z_%.]+]] = xor i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}} |
| // CHECK-NEXT: ret i64 [[REG144]] |
| |
| void __attribute__((noinline)) |
| test_alt_name_logic() { |
| res = _m_pand(m1, m2); |
| res = _m_pandn(m1, m2); |
| res = _m_por(m1, m2); |
| res = _m_pxor(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_logic |
| |
| // CHECK: define available_externally i64 @_m_pand |
| // CHECK: [[REG145:[0-9a-zA-Z_%.]+]] = call i64 @_mm_and_si64 |
| // CHECK-NEXT: ret i64 [[REG145]] |
| |
| // CHECK: define available_externally i64 @_m_pandn |
| // CHECK: [[REG146:[0-9a-zA-Z_%.]+]] = call i64 @_mm_andnot_si64 |
| // CHECK-NEXT: ret i64 [[REG146]] |
| |
| // CHECK: define available_externally i64 @_m_por |
| // CHECK: [[REG147:[0-9a-zA-Z_%.]+]] = call i64 @_mm_or_si64 |
| // CHECK-NEXT: ret i64 [[REG147]] |
| |
| // CHECK: define available_externally i64 @_m_pxor |
| // CHECK: [[REG148:[0-9a-zA-Z_%.]+]] = call i64 @_mm_xor_si64 |
| // CHECK-NEXT: ret i64 [[REG148]] |
| |
| void __attribute__((noinline)) |
| test_madd() { |
| res = _mm_madd_pi16(m1, m2); |
| res = _m_pmaddwd(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_madd |
| |
| // CHECK: define available_externally i64 @_mm_madd_pi16 |
| // CHECK: store <4 x i32> zeroinitializer, <4 x i32>* [[REG149:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK: [[REG150:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG151:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG150]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG151]], <8 x i16>* [[REG152:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK: [[REG153:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG154:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG153]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG154]], <8 x i16>* [[REG155:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG156:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG152]], align 16 |
| // CHECK-NEXT: [[REG157:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG155]], align 16 |
| // CHECK-NEXT: [[REG158:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG149]], align 16 |
| // CHECK-NEXT: [[REG159:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmsumshm(<8 x i16> [[REG156]], <8 x i16> [[REG157]], <4 x i32> [[REG158]]) |
| // CHECK-NEXT: store <4 x i32> [[REG159]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16 |
| |
| // CHECK: define available_externally i64 @_m_pmaddwd |
| // CHECK: [[REG160:[0-9a-zA-Z_%.]+]] = call i64 @_mm_madd_pi16 |
| // CHECK-NEXT: ret i64 [[REG160]] |
| |
| void __attribute__((noinline)) |
| test_mul() { |
| res = _mm_mulhi_pi16(m1, m2); |
| res = _mm_mullo_pi16(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_mul |
| |
| // CHECK: define available_externally i64 @_mm_mulhi_pi16 |
| // CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16 |
| // CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16 |
| // CHECK: [[REG161:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK: store <8 x i16> {{[0-9a-zA-Z_%.]+}}, <8 x i16>* [[REG162:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG163:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG164:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG165:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG164]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG165]], <8 x i16>* [[REG166:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG167:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG162]], align 16 |
| // CHECK-NEXT: [[REG168:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG166]], align 16 |
| // CHECK-NEXT: [[REG169:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulesh(<8 x i16> [[REG167]], <8 x i16> [[REG168]]) |
| // CHECK-NEXT: store <4 x i32> [[REG169]], <4 x i32>* [[REG170:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG171:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG162]], align 16 |
| // CHECK-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG166]], align 16 |
| // CHECK-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulosh(<8 x i16> [[REG171]], <8 x i16> [[REG172]]) |
| // CHECK-NEXT: store <4 x i32> [[REG173]], <4 x i32>* [[REG174:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG175:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG170]], align 16 |
| // CHECK-NEXT: [[REG176:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG174]], align 16 |
| // CHECK-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16 |
| // CHECK-NEXT: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> [[REG175]], <4 x i32> [[REG176]], <16 x i8> [[REG177]]) |
| |
| // CHECK: define available_externally i64 @_mm_mullo_pi16 |
| // CHECK: [[REG178:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG178]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG179]], <8 x i16>* [[REG180:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG181:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG182:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG182]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG183]], <8 x i16>* [[REG184:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG185:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG180]], align 16 |
| // CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG184]], align 16 |
| // CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = mul <8 x i16> [[REG185]], [[REG186]] |
| // CHECK-NEXT: store <8 x i16> [[REG187]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16 |
| |
| void __attribute__((noinline)) |
| test_alt_name_mul() { |
| res = _m_pmulhw(m1, m2); |
| res = _m_pmullw(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_mul |
| |
| // CHECK: define available_externally i64 @_m_pmulhw |
| // CHECK: [[REG188:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mulhi_pi16 |
| // CHECK-NEXT: ret i64 [[REG188]] |
| |
| // CHECK: define available_externally i64 @_m_pmullw |
| // CHECK: [[REG189:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mullo_pi16 |
| // CHECK-NEXT: ret i64 [[REG189]] |
| |
| void __attribute__((noinline)) |
| test_packs() { |
| res = _mm_packs_pu16((__m64)ull1, (__m64)ull2); |
| res = _mm_packs_pi16((__m64)ull1, (__m64)ull2); |
| res = _mm_packs_pi32((__m64)ull1, (__m64)ull2); |
| } |
| |
| // CHECK-LABEL: @test_packs |
| |
| // CHECK: define available_externally i64 @_mm_packs_pu16(i64 [[REG190:[0-9a-zA-Z_%.]+]], i64 [[REG191:[0-9a-zA-Z_%.]+]]) |
| // CHECK: store i64 [[REG190]], i64* [[REG192:[0-9a-zA-Z_%.]+]], align 8 |
| // CHECK-NEXT: store i64 [[REG191]], i64* [[REG193:[0-9a-zA-Z_%.]+]], align 8 |
| // CHECK-LE: load i64, i64* [[REG192]], align 8 |
| // CHECK: load i64, i64* [[REG193]], align 8 |
| // CHECK-BE: load i64, i64* [[REG192]], align 8 |
| // CHECK: [[REG194:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt |
| // CHECK-NEXT: store <8 x i16> [[REG194]], <8 x i16>* [[REG195:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG196:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG197:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG197]], align 16 |
| // CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_packs(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG196]], <8 x i16> [[REG198]]) |
| // CHECK-NEXT: store <16 x i8> [[REG199]], <16 x i8>* [[REG200:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG201:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG195]], align 16 |
| // CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG195]], align 16 |
| // CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_pack(bool vector[8], bool vector[8])(<8 x i16> [[REG201]], <8 x i16> [[REG202]]) |
| // CHECK-NEXT: store <16 x i8> [[REG203]], <16 x i8>* [[REG204:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG205:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG200]], align 16 |
| // CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG204]], align 16 |
| // CHECK-NEXT: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG205]], <16 x i8> zeroinitializer, <16 x i8> [[REG206]]) |
| |
| // CHECK: define available_externally i64 @_mm_packs_pi16(i64 [[REG207:[0-9a-zA-Z_%.]+]], i64 [[REG208:[0-9a-zA-Z_%.]+]]) |
| // CHECK: store i64 [[REG207]], i64* [[REG209:[0-9a-zA-Z_%.]+]], align 8 |
| // CHECK-NEXT: store i64 [[REG208]], i64* [[REG210:[0-9a-zA-Z_%.]+]], align 8 |
| // CHECK-LE: load i64, i64* [[REG209]], align 8 |
| // CHECK: load i64, i64* [[REG210]], align 8 |
| // CHECK-BE: load i64, i64* [[REG209]], align 8 |
| // CHECK: [[REG211:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG212:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG212]], align 16 |
| // CHECK-NEXT: call <16 x i8> @vec_packs(short vector[8], short vector[8])(<8 x i16> [[REG211]], <8 x i16> [[REG213]]) |
| |
| // CHECK: define available_externally i64 @_mm_packs_pi32(i64 [[REG214:[0-9a-zA-Z_%.]+]], i64 [[REG215:[0-9a-zA-Z_%.]+]]) |
| // CHECK: store i64 [[REG214]], i64* [[REG216:[0-9a-zA-Z_%.]+]], align 8 |
| // CHECK-NEXT: store i64 [[REG215]], i64* [[REG217:[0-9a-zA-Z_%.]+]], align 8 |
| // CHECK-LE: load i64, i64* [[REG216]], align 8 |
| // CHECK: load i64, i64* [[REG217]], align 8 |
| // CHECK-BE: load i64, i64* [[REG216]], align 8 |
| // CHECK: [[REG218:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG219:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG220:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG219]], align 16 |
| // CHECK-NEXT: call <8 x i16> @vec_packs(int vector[4], int vector[4])(<4 x i32> [[REG218]], <4 x i32> [[REG220]]) |
| |
| void __attribute__((noinline)) |
| test_alt_name_packs() { |
| res = _m_packssdw(m1, m2); |
| res = _m_packsswb(m1, m2); |
| res = _m_packuswb(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_packs |
| |
| // CHECK: define available_externally i64 @_m_packssdw |
| // CHECK: [[REG221:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pi32 |
| // CHECK-NEXT: ret i64 [[REG221]] |
| |
| // CHECK: define available_externally i64 @_m_packsswb |
| // CHECK: [[REG222:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pi16 |
| // CHECK-NEXT: ret i64 [[REG222]] |
| |
| // CHECK: define available_externally i64 @_m_packuswb |
| // CHECK: [[REG223:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pu16 |
| // CHECK-NEXT: ret i64 [[REG223]] |
| |
| void __attribute__((noinline)) |
| test_set() { |
| m1 = _mm_set_pi32(2134, -128); |
| m1 = _mm_set_pi16(2134, -128, 1234, 6354); |
| m1 = _mm_set_pi8(-128, 10, 0, 123, -1, -5, 127, 5); |
| } |
| |
| // CHECK-LABEL: @test_set |
| |
| // CHECK: define available_externally i64 @_mm_set_pi32 |
| // CHECK: [[REG224:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG224]] |
| // CHECK: [[REG225:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG225]] |
| |
| // CHECK: define available_externally i64 @_mm_set_pi16 |
| // CHECK: [[REG226:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG226]] |
| // CHECK: [[REG227:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG227]] |
| // CHECK: [[REG228:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2 |
| // CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG228]] |
| // CHECK: [[REG229:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3 |
| // CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG229]] |
| |
| // CHECK: define available_externally i64 @_mm_set_pi8 |
| // CHECK: [[REG230:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG230]] |
| // CHECK: [[REG231:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG231]] |
| // CHECK: [[REG232:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2 |
| // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG232]] |
| // CHECK: [[REG233:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3 |
| // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG233]] |
| // CHECK: [[REG234:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 4 |
| // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG234]] |
| // CHECK: [[REG235:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 5 |
| // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG235]] |
| // CHECK: [[REG236:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 6 |
| // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG236]] |
| // CHECK: [[REG237:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 7 |
| // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG237]] |
| |
| void __attribute__((noinline)) |
| test_set1() { |
| res = _mm_set1_pi32(i1); |
| res = _mm_set1_pi16(s[0]); |
| res = _mm_set1_pi8(c[0]); |
| } |
| |
| // CHECK-LABEL: @test_set1 |
| |
| // CHECK: define available_externally i64 @_mm_set1_pi32 |
| // CHECK: [[REG244:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG244]], align 8 |
| // CHECK-NEXT: [[REG245:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4 |
| // CHECK: [[REG246:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG246]], align 4 |
| |
| // CHECK: define available_externally i64 @_mm_set1_pi16 |
| |
| // CHECK-P9: [[REG247:[0-9a-zA-Z_%.]+]] = load i16, i16* {{[0-9a-zA-Z_%.]+}}, align 2 |
| // CHECK-P9-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(short)(i16 signext [[REG247]]) |
| // CHECK-P9-NEXT: store <8 x i16> %call, <8 x i16>* [[REG249:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-P9-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG249:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-P9-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG250]] to <2 x i64> |
| // CHECK-P9-NEXT: [[REG252:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG251]], i32 0 |
| // CHECK-P9-NEXT: ret i64 [[REG252]] |
| |
| // CHECK-P8: [[REG253:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254:[0-9a-zA-Z_%.]+]], align 2 |
| // CHECK-P8: [[REG255:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-P8-NEXT: store i16 [[REG253]], i16* [[REG255]], align 8 |
| // CHECK-P8-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2 |
| // CHECK-P8: [[REG256:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-P8-NEXT: store i16 [[REG250]], i16* [[REG256]], align 2 |
| // CHECK-P8-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2 |
| // CHECK-P8: [[REG257:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2 |
| // CHECK-P8-NEXT: store i16 [[REG251]], i16* [[REG257]], align 4 |
| // CHECK-P8-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2 |
| // CHECK-P8: [[REG259:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3 |
| // CHECK-P8-NEXT: store i16 [[REG258]], i16* [[REG259]], align 2 |
| |
| // CHECK: define available_externally i64 @_mm_set1_pi8 |
| // CHECK: [[REG260:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_splats(signed char)(i8 signext {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: store <16 x i8> [[REG260]], <16 x i8>* [[REG261:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG262:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG261:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG263:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> %1 to <2 x i64> |
| // CHECK-NEXT: [[REG264:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG263]], i32 0 |
| // CHECK-NEXT: ret i64 [[REG264]] |
| |
| void __attribute__((noinline)) |
| test_setr() { |
| res = _mm_setr_pi32(i1, i2); |
| res = _mm_setr_pi16(s[0], s[1], s[2], s[3]); |
| res = _mm_setr_pi8(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]); |
| } |
| |
| // CHECK-LABEL: @test_setr |
| |
| // CHECK: define available_externally i64 @_mm_setr_pi32 |
| // CHECK: [[REG265:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4 |
| // CHECK: [[REG266:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i32 [[REG265:[0-9a-zA-Z_%.]+]], i32* [[REG266]], align 8 |
| // CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4 |
| // CHECK: [[REG268:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i32 [[REG267]], i32* [[REG268]], align 4 |
| |
| // CHECK: define available_externally i64 @_mm_setr_pi16 |
| // CHECK: [[REG269:[0-9a-zA-Z_%.]+]] = call i64 @_mm_set_pi16 |
| // CHECK-NEXT: ret i64 [[REG269]] |
| |
| // CHECK: define available_externally i64 @_mm_setr_pi8 |
| // CHECK: [[REG270:[0-9a-zA-Z_%.]+]] = call i64 @_mm_set_pi8 |
| // CHECK-NEXT: ret i64 [[REG270]] |
| |
| void __attribute__((noinline)) |
| test_setzero() { |
| res = _mm_setzero_si64(); |
| } |
| |
| // CHECK-LABEL: @test_setzero |
| |
| // CHECK: define available_externally i64 @_mm_setzero_si64 |
| // CHECK: entry |
| // CHECK-NEXT: ret i64 0 |
| |
| void __attribute__((noinline)) |
| test_sll() { |
| res = _mm_sll_pi16(m1, m2); |
| res = _mm_sll_pi32(m1, m2); |
| res = _mm_sll_si64(m1, m2); |
| res = _mm_slli_pi16(m1, i1); |
| res = _mm_slli_pi32(m1, i1); |
| res = _mm_slli_si64(m1, i1); |
| } |
| |
| // CHECK-LABEL: @test_sll |
| |
| // CHECK: define available_externally i64 @_mm_sll_pi16 |
| // CHECK: [[REG271:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15 |
| // CHECK-NEXT: br i1 [[REG271]], label %[[REG272:[0-9a-zA-Z_.]+]], label %[[REG273:[0-9a-zA-Z_.]+]] |
| // CHECK: [[REG272]] |
| // CHECK: call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK: trunc i64 {{[0-9a-zA-Z_%.]+}} to i16 |
| // CHECK-NEXT: call <8 x i16> @vec_splats(unsigned short) |
| // CHECK: call <8 x i16> @vec_sl(short vector[8], unsigned short vector[8]) |
| // CHECK: store i64 [[REG274:[0-9a-zA-Z_%.]+]], i64* [[REG275:[0-9a-zA-Z_%.]+]], align 8 |
| // CHECK-NEXT: br label %[[REG276:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG273]] |
| // CHECK-NEXT: store i64 0, i64* [[REG275]], align 8 |
| // CHECK-NEXT: br label %[[REG276]] |
| // CHECK: [[REG276]] |
| // CHECK-NEXT: [[REG277:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG275]], align 8 |
| // CHECK-NEXT: ret i64 [[REG277]] |
| |
| // CHECK: define available_externally i64 @_mm_sll_pi32 |
| // CHECK: [[REG278:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: [[REG279:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG278]] |
| // CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}} |
| // CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG280]] to i32 |
| // CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = shl i32 [[REG279]], [[REG281]] |
| // CHECK: [[REG283:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i32 [[REG282]], i32* [[REG283]] |
| // CHECK: [[REG284:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: [[REG285:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG284]], align 4 |
| // CHECK: trunc i64 {{[0-9a-zA-Z_%.]+}} to i32 |
| // CHECK-NEXT: [[REG286:[0-9a-zA-Z_%.]+]] = shl i32 [[REG285]], {{[0-9a-zA-Z_%.]+}} |
| // CHECK: [[REG287:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i32 [[REG286]], i32* [[REG287]], align 4 |
| |
| // CHECK: define available_externally i64 @_mm_sll_si64 |
| // CHECK: [[REG288:[0-9a-zA-Z_%.]+]] = shl i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}} |
| // CHECK-NEXT: ret i64 [[REG288]] |
| |
| // CHECK: define available_externally i64 @_mm_slli_pi16 |
| // CHECK: [[REG289:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64 |
| // CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi16(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG289]]) |
| // CHECK-NEXT: ret i64 [[REG290]] |
| |
| // CHECK: define available_externally i64 @_mm_slli_pi32 |
| // CHECK: [[REG291:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64 |
| // CHECK-NEXT: [[REG292:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi32(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG291]]) |
| // CHECK-NEXT: ret i64 [[REG292]] |
| |
| // CHECK: define available_externally i64 @_mm_slli_si64 |
| // CHECK: [[REG293:[0-9a-zA-Z_%.]+]] = zext i32 {{[0-9a-zA-Z_%.]+}} to i64 |
| // CHECK-NEXT: [[REG294:[0-9a-zA-Z_%.]+]] = shl i64 {{[0-9a-zA-Z_%.]+}}, [[REG293]] |
| // CHECK-NEXT: ret i64 [[REG294]] |
| |
| void __attribute__((noinline)) |
| test_alt_name_sll() { |
| res = _m_pslld(m1, m2); |
| res = _m_pslldi(m1, i1); |
| res = _m_psllq(m1, m2); |
| res = _m_psllqi(m1, i1); |
| res = _m_psllw(m1, m2); |
| res = _m_psllwi(m1, i1); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_sll |
| |
| // CHECK: define available_externally i64 @_m_pslld |
| // CHECK: [[REG295:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi32 |
| // CHECK-NEXT: ret i64 [[REG295]] |
| |
| // CHECK: define available_externally i64 @_m_pslldi |
| // CHECK: [[REG296:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_pi32 |
| // CHECK-NEXT: ret i64 [[REG296]] |
| |
| // CHECK: define available_externally i64 @_m_psllq |
| // CHECK: [[REG297:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_si64 |
| // CHECK-NEXT: ret i64 [[REG297]] |
| |
| // CHECK: define available_externally i64 @_m_psllqi |
| // CHECK: [[REG298:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_si64 |
| // CHECK-NEXT: ret i64 [[REG298]] |
| |
| // CHECK: define available_externally i64 @_m_psllw |
| // CHECK: [[REG299:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi16 |
| // CHECK-NEXT: ret i64 [[REG299]] |
| |
| // CHECK: define available_externally i64 @_m_psllwi |
| // CHECK: [[REG300:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_pi16 |
| // CHECK-NEXT: ret i64 [[REG300]] |
| |
| void __attribute__((noinline)) |
| test_sra() { |
| res = _mm_sra_pi32(m1, m2); |
| res = _mm_sra_pi16(m1, m2); |
| res = _mm_srai_pi32(m1, i1); |
| res = _mm_srai_pi16(m1, i1); |
| } |
| |
| // CHECK-LABEL: @test_sra |
| |
| // CHECK: define available_externally i64 @_mm_sra_pi32 |
| // CHECK: [[REG301:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: [[REG302:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG301]], align 8 |
| // CHECK-NEXT: [[REG303:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG303]] to i32 |
| // CHECK-NEXT: [[REG305:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG302]], [[REG304]] |
| // CHECK: [[REG306:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i32 [[REG305]], i32* [[REG306]], align 8 |
| // CHECK: [[REG307:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: [[REG308:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG307]], align 4 |
| // CHECK-NEXT: [[REG309:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG310:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG309]] to i32 |
| // CHECK-NEXT: [[REG311:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG308]], [[REG310]] |
| // CHECK: [[REG312:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i32 [[REG311]], i32* [[REG312]], align 4 |
| |
| // CHECK: define available_externally i64 @_mm_sra_pi16 |
| // CHECK: [[REG313:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15 |
| // CHECK-NEXT: br i1 [[REG313]], label %[[REG314:[0-9a-zA-Z_%.]+]], label %[[REG315:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG314]] |
| // CHECK: [[REG316:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG316]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG317]], <8 x i16>* [[REG318:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG319]] to i16 |
| // CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 zeroext [[REG320]]) |
| // CHECK-NEXT: store <8 x i16> [[REG321]], <8 x i16>* [[REG322:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG318]], align 16 |
| // CHECK-NEXT: [[REG324:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG322]], align 16 |
| // CHECK-NEXT: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])(<8 x i16> [[REG323]], <8 x i16> [[REG324]]) |
| // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG325:[0-9a-zA-Z_%.]+]], align 8 |
| // CHECK-NEXT: br label %[[REG326:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG315]] |
| // CHECK-NEXT: store i64 0, i64* [[REG325]], align 8 |
| // CHECK-NEXT: br label %[[REG326]] |
| // CHECK: [[REG326]] |
| // CHECK-NEXT: [[REG327:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG325]], align 8 |
| // CHECK-NEXT: ret i64 [[REG327]] |
| |
| // CHECK: define available_externally i64 @_mm_srai_pi32 |
| // CHECK: [[REG328:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64 |
| // CHECK-NEXT: [[REG329:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi32(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG328]]) |
| // CHECK-NEXT: ret i64 [[REG329]] |
| |
| // CHECK: define available_externally i64 @_mm_srai_pi16 |
| // CHECK: [[REG330:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64 |
| // CHECK-NEXT: [[REG331:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi16(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG330]]) |
| // CHECK-NEXT: ret i64 [[REG331]] |
| |
| void __attribute__((noinline)) |
| test_alt_name_sra() { |
| res = _m_psrad(m1, m2); |
| res = _m_psraw(m1, m2); |
| res = _m_psradi(m1, i1); |
| res = _m_psrawi(m1, i1); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_sra |
| |
| // CHECK: define available_externally i64 @_m_psrad |
| // CHECK: [[REG332:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi32 |
| // CHECK-NEXT: ret i64 [[REG332]] |
| |
| // CHECK: define available_externally i64 @_m_psraw |
| // CHECK: [[REG333:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi16 |
| // CHECK-NEXT: ret i64 [[REG333]] |
| |
| // CHECK: define available_externally i64 @_m_psradi |
| // CHECK: [[REG334:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srai_pi32 |
| // CHECK-NEXT: ret i64 [[REG334]] |
| |
| // CHECK: define available_externally i64 @_m_psrawi |
| // CHECK: [[REG335:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srai_pi16 |
| // CHECK-NEXT: ret i64 [[REG335]] |
| |
| void __attribute__((noinline)) |
| test_srl() { |
| res = _mm_srl_si64(m1, m2); |
| res = _mm_srl_pi32(m1, m2); |
| res = _mm_srl_pi16(m1, m2); |
| res = _mm_srli_si64(m1, i1); |
| res = _mm_srli_pi32(m1, i1); |
| res = _mm_srli_pi16(m1, i1); |
| } |
| |
| // CHECK-LABEL: @test_srl |
| |
| // CHECK: define available_externally i64 @_mm_srl_si64 |
| // CHECK: [[REG336:[0-9a-zA-Z_%.]+]] = lshr i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}} |
| // CHECK-NEXT: ret i64 [[REG336]] |
| |
| // CHECK: define available_externally i64 @_mm_srl_pi32 |
| // CHECK: [[REG337:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG337]], align 8 |
| // CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG340:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG339]] to i32 |
| // CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = lshr i32 [[REG338]], [[REG340]] |
| // CHECK: [[REG342:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i32 [[REG341]], i32* [[REG342]], align 8 |
| // CHECK: [[REG343:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: [[REG344:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG343]], align 4 |
| // CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG345]] to i32 |
| // CHECK-NEXT: [[REG347:[0-9a-zA-Z_%.]+]] = lshr i32 [[REG344]], [[REG346]] |
| // CHECK: [[REG348:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i32 [[REG347]], i32* [[REG348]], align 4 |
| |
| // CHECK: define available_externally i64 @_mm_srl_pi16 |
| // CHECK: [[REG349:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15 |
| // CHECK-NEXT: br i1 [[REG349]], label %[[REG350:[0-9a-zA-Z_%.]+]], label %[[REG351:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG350]] |
| // CHECK: [[REG352:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG352]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG353]], <8 x i16>* [[REG354:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG355:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG356:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG355]] to i16 |
| // CHECK-NEXT: [[REG357:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 zeroext [[REG356]]) |
| // CHECK-NEXT: store <8 x i16> [[REG357]], <8 x i16>* [[REG358:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG359:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG354]], align 16 |
| // CHECK-NEXT: [[REG360:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG358]], align 16 |
| // CHECK-NEXT: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG359]], <8 x i16> [[REG360]]) |
| // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG361:[0-9a-zA-Z_%.]+]], align 8 |
| // CHECK-NEXT: br label %[[REG362:[0-9a-zA-Z_%.]+]] |
| // CHECK: [[REG351]] |
| // CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: br label %[[REG362]] |
| // CHECK: [[REG362]] |
| // CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG361]], align 8 |
| // CHECK-NEXT: ret i64 [[REG363]] |
| |
| // CHECK: define available_externally i64 @_mm_srli_si64 |
| // CHECK: [[REG364:[0-9a-zA-Z_%.]+]] = zext i32 {{[0-9a-zA-Z_%.]+}} to i64 |
| // CHECK-NEXT: [[REG365:[0-9a-zA-Z_%.]+]] = lshr i64 {{[0-9a-zA-Z_%.]+}}, [[REG364]] |
| // CHECK-NEXT: ret i64 [[REG365]] |
| |
| // CHECK: define available_externally i64 @_mm_srli_pi32 |
| // CHECK: [[REG366:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64 |
| // CHECK-NEXT: [[REG367:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi32(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG366]]) |
| // CHECK-NEXT: ret i64 [[REG367]] |
| |
| // CHECK: define available_externally i64 @_mm_srli_pi16 |
| // CHECK: [[REG366:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64 |
| // CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi16(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG366]]) |
| // CHECK-NEXT: ret i64 [[REG368]] |
| |
| void __attribute__((noinline)) |
| test_alt_name_srl() { |
| res = _m_psrlq(m1, m2); |
| res = _m_psrld(m1, m2); |
| res = _m_psrlw(m1, m2); |
| res = _m_psrlqi(m1, i1); |
| res = _m_psrldi(m1, i1); |
| res = _m_psrlwi(m1, i1); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_srl |
| |
| // CHECK: define available_externally i64 @_m_psrlq |
| // CHECK: [[REG369:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_si64 |
| // CHECK-NEXT: ret i64 [[REG369]] |
| |
| // CHECK: define available_externally i64 @_m_psrld |
| // CHECK: [[REG370:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi32 |
| // CHECK-NEXT: ret i64 [[REG370]] |
| |
| // CHECK: define available_externally i64 @_m_psrlw |
| // CHECK: [[REG371:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi16 |
| // CHECK-NEXT: ret i64 [[REG371]] |
| |
| // CHECK: define available_externally i64 @_m_psrlqi |
| // CHECK: [[REG372:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_si64 |
| // CHECK-NEXT: ret i64 [[REG372]] |
| |
| // CHECK: define available_externally i64 @_m_psrldi |
| // CHECK: [[REG373:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_pi32 |
| // CHECK-NEXT: ret i64 [[REG373]] |
| |
| // CHECK: define available_externally i64 @_m_psrlwi |
| // CHECK: [[REG374:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_pi16 |
| // CHECK-NEXT: ret i64 [[REG374]] |
| |
| void __attribute__((noinline)) |
| test_sub() { |
| res = _mm_sub_pi32(m1, m2); |
| res = _mm_sub_pi16(m1, m2); |
| res = _mm_sub_pi8(m1, m2); |
| res = _mm_subs_pi16(m1, m2); |
| res = _mm_subs_pi8(m1, m2); |
| res = _mm_subs_pu16(m1, m2); |
| res = _mm_subs_pu8(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_sub |
| |
| // CHECK: define available_externally i64 @_mm_sub_pi32 |
| |
| // CHECK-P8: [[REG375:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-P8-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG375]], align 8 |
| // CHECK-P8: [[REG377:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-P8-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG377]], align 8 |
| // CHECK-P8-NEXT: sub nsw i32 [[REG376]], [[REG378]] |
| // CHECK-P8: [[REG379:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-P8-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG379]], align 4 |
| // CHECK-P8: [[REG381:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-P8-NEXT: [[REG382:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG381]], align 4 |
| // CHECK-P8-NEXT: sub nsw i32 [[REG380]], [[REG382]] |
| |
| // CHECK-P9: [[REG383:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-P9-NEXT: [[REG384:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG383]] to <4 x i32> |
| // CHECK-P9-NEXT: store <4 x i32> [[REG384]], <4 x i32>* [[REG385:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-P9: [[REG386:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-P9-NEXT: [[REG387:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG386]] to <4 x i32> |
| // CHECK-P9-NEXT: store <4 x i32> [[REG387]], <4 x i32>* [[REG388:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-P9-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG385]], align 16 |
| // CHECK-P9-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG388]], align 16 |
| // CHECK-P9-NEXT: call <4 x i32> @vec_sub(int vector[4], int vector[4])(<4 x i32> [[REG389]], <4 x i32> [[REG390]]) |
| |
| // CHECK: define available_externally i64 @_mm_sub_pi16 |
| // CHECK: [[REG391:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG392:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG391]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG392]], <8 x i16>* [[REG393:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK: [[REG394:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG394]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG395]], <8 x i16>* [[REG396:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG393]], align 16 |
| // CHECK-NEXT: [[REG398:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG396]], align 16 |
| // CHECK-NEXT: call <8 x i16> @vec_sub(short vector[8], short vector[8])(<8 x i16> [[REG397]], <8 x i16> [[REG398]]) |
| |
| // CHECK: define available_externally i64 @_mm_sub_pi8 |
| // CHECK: [[REG399:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG399]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG400]], <16 x i8>* [[REG401:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK: [[REG402:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG403:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG402]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG403]], <16 x i8>* [[REG404:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG405:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG401]], align 16 |
| // CHECK-NEXT: [[REG406:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG404]], align 16 |
| // CHECK-NEXT: call <16 x i8> @vec_sub(signed char vector[16], signed char vector[16])(<16 x i8> [[REG405]], <16 x i8> [[REG406]]) |
| |
| // CHECK: define available_externally i64 @_mm_subs_pi16 |
| // CHECK: [[REG407:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG408:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG407]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG408]], <8 x i16>* [[REG409:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK: [[REG410:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG410]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG411]], <8 x i16>* [[REG412:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG413:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG409]], align 16 |
| // CHECK-NEXT: [[REG414:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG412]], align 16 |
| // CHECK-NEXT: call <8 x i16> @vec_subs(short vector[8], short vector[8])(<8 x i16> [[REG413]], <8 x i16> [[REG414]]) |
| |
| // CHECK: define available_externally i64 @_mm_subs_pi8 |
| // CHECK: [[REG415:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG416:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG415]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG416]], <16 x i8>* [[REG417:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK: [[REG418:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG418]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG419]], <16 x i8>* [[REG420:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG421:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG417]], align 16 |
| // CHECK-NEXT: [[REG422:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG420]], align 16 |
| // CHECK-NEXT: call <16 x i8> @vec_subs(signed char vector[16], signed char vector[16])(<16 x i8> [[REG421]], <16 x i8> [[REG422]]) |
| |
| // CHECK: define available_externally i64 @_mm_subs_pu16 |
| // CHECK: [[REG423:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG424:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG423]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG424]], <8 x i16>* [[REG425:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK: [[REG426:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG427:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG426]] to <8 x i16> |
| // CHECK-NEXT: store <8 x i16> [[REG427]], <8 x i16>* [[REG428:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG425]], align 16 |
| // CHECK-NEXT: [[REG430:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG428]], align 16 |
| // CHECK-NEXT: call <8 x i16> @vec_subs(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG429]], <8 x i16> [[REG430]]) |
| |
| // CHECK: define available_externally i64 @_mm_subs_pu8 |
| // CHECK: [[REG431:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG432:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG431]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG432]], <16 x i8>* [[REG433:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK: [[REG434:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}}) |
| // CHECK-NEXT: [[REG435:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG434]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG435]], <16 x i8>* [[REG436:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG437:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG433]], align 16 |
| // CHECK-NEXT: [[REG438:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG436]], align 16 |
| // CHECK-NEXT: call <16 x i8> @vec_subs(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG437]], <16 x i8> [[REG438]]) |
| |
| void __attribute__((noinline)) |
| test_alt_name_sub() { |
| res = _m_psubd(m1, m2); |
| res = _m_psubw(m1, m2); |
| res = _m_psubb(m1, m2); |
| res = _m_psubsw(m1, m2); |
| res = _m_psubsb(m1, m2); |
| res = _m_psubusw(m1, m2); |
| res = _m_psubusb(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_sub |
| |
| // CHECK: define available_externally i64 @_m_psubd |
| // CHECK: [[REG439:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi32 |
| // CHECK-NEXT: ret i64 [[REG439]] |
| |
| // CHECK: define available_externally i64 @_m_psubw |
| // CHECK: [[REG440:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi16 |
| // CHECK-NEXT: ret i64 [[REG440]] |
| |
| // CHECK: define available_externally i64 @_m_psubb |
| // CHECK: [[REG441:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi8 |
| // CHECK-NEXT: ret i64 [[REG441]] |
| |
| // CHECK: define available_externally i64 @_m_psubsw |
| // CHECK: [[REG442:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pi16 |
| // CHECK-NEXT: ret i64 [[REG442]] |
| |
| // CHECK: define available_externally i64 @_m_psubsb |
| // CHECK: [[REG443:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pi8 |
| // CHECK-NEXT: ret i64 [[REG443]] |
| |
| // CHECK: define available_externally i64 @_m_psubusw |
| // CHECK: [[REG444:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pu16 |
| // CHECK-NEXT: ret i64 [[REG444]] |
| |
| // CHECK: define available_externally i64 @_m_psubusb |
| // CHECK: [[REG445:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pu8 |
| // CHECK-NEXT: ret i64 [[REG445]] |
| |
| void __attribute__((noinline)) |
| test_unpack() { |
| res = _mm_unpackhi_pi32(m1, m2); |
| res = _mm_unpackhi_pi16(m1, m2); |
| res = _mm_unpackhi_pi8(m1, m2); |
| res = _mm_unpacklo_pi32(m1, m2); |
| res = _mm_unpacklo_pi16(m1, m2); |
| res = _mm_unpacklo_pi8(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_unpack |
| |
| // CHECK: define available_externally i64 @_mm_unpackhi_pi32 |
| // CHECK: [[REG446:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: [[REG447:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG446]], align 4 |
| // CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* [[REG449:[0-9a-zA-Z_%.]+]] to [2 x i32]* |
| // CHECK-NEXT: [[REG450:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* [[REG448]], i64 0, i64 0 |
| // CHECK-NEXT: store i32 [[REG447]], i32* [[REG450]], align 8 |
| // CHECK: [[REG451:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: [[REG452:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG451]], align 4 |
| // CHECK-NEXT: [[REG453:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* [[REG449]] to [2 x i32]* |
| // CHECK-NEXT: [[REG454:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* [[REG453]], i64 0, i64 1 |
| // CHECK-NEXT: store i32 [[REG452]], i32* [[REG454]], align 4 |
| |
| // CHECK: define available_externally i64 @_mm_unpackhi_pi16 |
| // CHECK: [[REG455:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2 |
| // CHECK-NEXT: [[REG456:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG455]], align 4 |
| // CHECK: [[REG457:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i16 [[REG456]], i16* [[REG457]], align 8 |
| // CHECK: [[REG458:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2 |
| // CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG458]], align 4 |
| // CHECK: [[REG460:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i16 [[REG459]], i16* [[REG460]], align 2 |
| // CHECK: [[REG461:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3 |
| // CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG461]], align 2 |
| // CHECK: [[REG463:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2 |
| // CHECK-NEXT: store i16 [[REG462]], i16* [[REG463]], align 4 |
| // CHECK: [[REG464:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3 |
| // CHECK-NEXT: [[REG465:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG464]], align 2 |
| // CHECK: [[REG466:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3 |
| // CHECK-NEXT: store i16 [[REG465]], i16* [[REG466]], align 2 |
| |
| // CHECK: define available_externally i64 @_mm_unpackhi_pi8 |
| // CHECK: [[REG467:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG468:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG467]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG468]], <16 x i8>* [[REG469:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG470:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG471:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG472:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG471]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG472]], <16 x i8>* [[REG473:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG474:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG469]], align 16 |
| // CHECK-NEXT: [[REG475:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG473]], align 16 |
| // CHECK-NEXT: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG474]], <16 x i8> [[REG475]]) |
| |
| // CHECK: define available_externally i64 @_mm_unpacklo_pi32 |
| // CHECK: [[REG476:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG476]], align 8 |
| // CHECK: [[REG478:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i32 [[REG477]], i32* [[REG478]], align 8 |
| // CHECK: [[REG479:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG479]], align 8 |
| // CHECK: [[REG481:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i32 [[REG480]], i32* [[REG481]], align 4 |
| |
| // CHECK: define available_externally i64 @_mm_unpacklo_pi16 |
| // CHECK: [[REG482:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: [[REG483:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG482]], align 8 |
| // CHECK: [[REG484:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: store i16 [[REG483]], i16* [[REG484]], align 8 |
| // CHECK: [[REG485:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0 |
| // CHECK-NEXT: [[REG486:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG485]], align 8 |
| // CHECK: [[REG487:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: store i16 [[REG486]], i16* [[REG487]], align 2 |
| // CHECK: [[REG488:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG488]], align 2 |
| // CHECK: [[REG490:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2 |
| // CHECK-NEXT: store i16 [[REG489]], i16* [[REG490]], align 4 |
| // CHECK: [[REG491:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1 |
| // CHECK-NEXT: [[REG492:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG491]], align 2 |
| // CHECK: [[REG493:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3 |
| // CHECK-NEXT: store i16 [[REG492]], i16* [[REG493]], align 2 |
| |
| // CHECK: define available_externally i64 @_mm_unpacklo_pi8 |
| // CHECK: [[REG494:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG494]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG495]], <16 x i8>* [[REG496:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8 |
| // CHECK-NEXT: [[REG498:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long) |
| // CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG498]] to <16 x i8> |
| // CHECK-NEXT: store <16 x i8> [[REG499]], <16 x i8>* [[REG500:[0-9a-zA-Z_%.]+]], align 16 |
| // CHECK-NEXT: [[REG501:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG496]], align 16 |
| // CHECK-NEXT: [[REG502:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG500]], align 16 |
| // CHECK-NEXT: [[REG503:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16]) |
| |
| void __attribute__((noinline)) |
| test_alt_name_unpack() { |
| res = _m_punpckhdq(m1, m2); |
| res = _m_punpckhwd(m1, m2); |
| res = _m_punpckhbw(m1, m2); |
| res = _m_punpckldq(m1, m2); |
| res = _m_punpcklwd(m1, m2); |
| res = _m_punpcklbw(m1, m2); |
| } |
| |
| // CHECK-LABEL: @test_alt_name_unpack |
| |
| // CHECK: define available_externally i64 @_m_punpckhdq |
| // CHECK: [[REG238:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi32 |
| // CHECK-NEXT: ret i64 [[REG238]] |
| |
| // CHECK: define available_externally i64 @_m_punpckhwd |
| // CHECK: [[REG239:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi16 |
| // CHECK-NEXT: ret i64 [[REG239]] |
| |
| // CHECK: define available_externally i64 @_m_punpckhbw |
| // CHECK: [[REG240:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi8 |
| // CHECK-NEXT: ret i64 [[REG240]] |
| |
| // CHECK: define available_externally i64 @_m_punpckldq |
| // CHECK: [[REG241:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi32 |
| // CHECK-NEXT: ret i64 [[REG241]] |
| |
| // CHECK: define available_externally i64 @_m_punpcklwd |
| // CHECK: [[REG242:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi16 |
| // CHECK-NEXT: ret i64 [[REG242]] |
| |
| // CHECK: define available_externally i64 @_m_punpcklbw |
| // CHECK: [[REG243:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi8 |
| // CHECK-NEXT: ret i64 [[REG243]] |