blob: d3872891853d427ed817da025f4fb61cb32811c3 [file] [log] [blame]
! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
! REQUIRES: target=powerpc{{.*}}
! mma_assemble_acc
subroutine test_assemble_acc_i1()
use, intrinsic :: mma
implicit none
vector(integer(1)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_assemble_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_assemble_acc_i1
! CHECK-LABEL: @test_assemble_acc_i1
! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
! LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16
! LLVMIR: %3 = alloca <16 x i8>, i64 1, align 16
! LLVMIR: %4 = alloca <16 x i8>, i64 1, align 16
! LLVMIR: %5 = alloca <16 x i8>, i64 1, align 16
! LLVMIR: %6 = load <16 x i8>, ptr %2, align 16
! LLVMIR: %7 = load <16 x i8>, ptr %3, align 16
! LLVMIR: %8 = load <16 x i8>, ptr %4, align 16
! LLVMIR: %9 = load <16 x i8>, ptr %5, align 16
! LLVMIR: %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %6, <16 x i8> %7, <16 x i8> %8, <16 x i8> %9)
! LLVMIR: store <512 x i1> %10, ptr %1, align 64
subroutine test_assemble_acc_i2()
use, intrinsic :: mma
implicit none
vector(integer(2)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_assemble_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_assemble_acc_i2
! CHECK-LABEL: @test_assemble_acc_i2
! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
! LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16
! LLVMIR: %3 = alloca <8 x i16>, i64 1, align 16
! LLVMIR: %4 = alloca <8 x i16>, i64 1, align 16
! LLVMIR: %5 = alloca <8 x i16>, i64 1, align 16
! LLVMIR: %6 = load <8 x i16>, ptr %2, align 16
! LLVMIR: %7 = load <8 x i16>, ptr %3, align 16
! LLVMIR: %8 = load <8 x i16>, ptr %4, align 16
! LLVMIR: %9 = load <8 x i16>, ptr %5, align 16
! LLVMIR: %10 = bitcast <8 x i16> %6 to <16 x i8>
! LLVMIR: %11 = bitcast <8 x i16> %7 to <16 x i8>
! LLVMIR: %12 = bitcast <8 x i16> %8 to <16 x i8>
! LLVMIR: %13 = bitcast <8 x i16> %9 to <16 x i8>
! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
! LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_assemble_acc_i4()
use, intrinsic :: mma
implicit none
vector(integer(4)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_assemble_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_assemble_acc_i4
! CHECK-LABEL: @test_assemble_acc_i4
! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
! LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16
! LLVMIR: %3 = alloca <4 x i32>, i64 1, align 16
! LLVMIR: %4 = alloca <4 x i32>, i64 1, align 16
! LLVMIR: %5 = alloca <4 x i32>, i64 1, align 16
! LLVMIR: %6 = load <4 x i32>, ptr %2, align 16
! LLVMIR: %7 = load <4 x i32>, ptr %3, align 16
! LLVMIR: %8 = load <4 x i32>, ptr %4, align 16
! LLVMIR: %9 = load <4 x i32>, ptr %5, align 16
! LLVMIR: %10 = bitcast <4 x i32> %6 to <16 x i8>
! LLVMIR: %11 = bitcast <4 x i32> %7 to <16 x i8>
! LLVMIR: %12 = bitcast <4 x i32> %8 to <16 x i8>
! LLVMIR: %13 = bitcast <4 x i32> %9 to <16 x i8>
! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
! LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_assemble_acc_i8()
use, intrinsic :: mma
implicit none
vector(integer(8)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_assemble_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_assemble_acc_i8
! CHECK-LABEL: @test_assemble_acc_i8
! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
! LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16
! LLVMIR: %3 = alloca <2 x i64>, i64 1, align 16
! LLVMIR: %4 = alloca <2 x i64>, i64 1, align 16
! LLVMIR: %5 = alloca <2 x i64>, i64 1, align 16
! LLVMIR: %6 = load <2 x i64>, ptr %2, align 16
! LLVMIR: %7 = load <2 x i64>, ptr %3, align 16
! LLVMIR: %8 = load <2 x i64>, ptr %4, align 16
! LLVMIR: %9 = load <2 x i64>, ptr %5, align 16
! LLVMIR: %10 = bitcast <2 x i64> %6 to <16 x i8>
! LLVMIR: %11 = bitcast <2 x i64> %7 to <16 x i8>
! LLVMIR: %12 = bitcast <2 x i64> %8 to <16 x i8>
! LLVMIR: %13 = bitcast <2 x i64> %9 to <16 x i8>
! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
! LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_assemble_acc_u1()
use, intrinsic :: mma
implicit none
vector(unsigned(1)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_assemble_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_assemble_acc_u1
! CHECK-LABEL: @test_assemble_acc_u1
! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
! LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16
! LLVMIR: %3 = alloca <16 x i8>, i64 1, align 16
! LLVMIR: %4 = alloca <16 x i8>, i64 1, align 16
! LLVMIR: %5 = alloca <16 x i8>, i64 1, align 16
! LLVMIR: %6 = load <16 x i8>, ptr %2, align 16
! LLVMIR: %7 = load <16 x i8>, ptr %3, align 16
! LLVMIR: %8 = load <16 x i8>, ptr %4, align 16
! LLVMIR: %9 = load <16 x i8>, ptr %5, align 16
! LLVMIR: %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %6, <16 x i8> %7, <16 x i8> %8, <16 x i8> %9)
! LLVMIR: store <512 x i1> %10, ptr %1, align 64
subroutine test_assemble_acc_u2()
use, intrinsic :: mma
implicit none
vector(unsigned(2)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_assemble_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_assemble_acc_u2
! CHECK-LABEL: @test_assemble_acc_u2
! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
! LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16
! LLVMIR: %3 = alloca <8 x i16>, i64 1, align 16
! LLVMIR: %4 = alloca <8 x i16>, i64 1, align 16
! LLVMIR: %5 = alloca <8 x i16>, i64 1, align 16
! LLVMIR: %6 = load <8 x i16>, ptr %2, align 16
! LLVMIR: %7 = load <8 x i16>, ptr %3, align 16
! LLVMIR: %8 = load <8 x i16>, ptr %4, align 16
! LLVMIR: %9 = load <8 x i16>, ptr %5, align 16
! LLVMIR: %10 = bitcast <8 x i16> %6 to <16 x i8>
! LLVMIR: %11 = bitcast <8 x i16> %7 to <16 x i8>
! LLVMIR: %12 = bitcast <8 x i16> %8 to <16 x i8>
! LLVMIR: %13 = bitcast <8 x i16> %9 to <16 x i8>
! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
! LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_assemble_acc_u4()
use, intrinsic :: mma
implicit none
vector(unsigned(4)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_assemble_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_assemble_acc_u4
! CHECK-LABEL: @test_assemble_acc_u4
! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
! LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16
! LLVMIR: %3 = alloca <4 x i32>, i64 1, align 16
! LLVMIR: %4 = alloca <4 x i32>, i64 1, align 16
! LLVMIR: %5 = alloca <4 x i32>, i64 1, align 16
! LLVMIR: %6 = load <4 x i32>, ptr %2, align 16
! LLVMIR: %7 = load <4 x i32>, ptr %3, align 16
! LLVMIR: %8 = load <4 x i32>, ptr %4, align 16
! LLVMIR: %9 = load <4 x i32>, ptr %5, align 16
! LLVMIR: %10 = bitcast <4 x i32> %6 to <16 x i8>
! LLVMIR: %11 = bitcast <4 x i32> %7 to <16 x i8>
! LLVMIR: %12 = bitcast <4 x i32> %8 to <16 x i8>
! LLVMIR: %13 = bitcast <4 x i32> %9 to <16 x i8>
! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
! LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_assemble_acc_u8()
use, intrinsic :: mma
implicit none
vector(unsigned(8)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_assemble_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_assemble_acc_u8
! CHECK-LABEL: @test_assemble_acc_u8
! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
! LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16
! LLVMIR: %3 = alloca <2 x i64>, i64 1, align 16
! LLVMIR: %4 = alloca <2 x i64>, i64 1, align 16
! LLVMIR: %5 = alloca <2 x i64>, i64 1, align 16
! LLVMIR: %6 = load <2 x i64>, ptr %2, align 16
! LLVMIR: %7 = load <2 x i64>, ptr %3, align 16
! LLVMIR: %8 = load <2 x i64>, ptr %4, align 16
! LLVMIR: %9 = load <2 x i64>, ptr %5, align 16
! LLVMIR: %10 = bitcast <2 x i64> %6 to <16 x i8>
! LLVMIR: %11 = bitcast <2 x i64> %7 to <16 x i8>
! LLVMIR: %12 = bitcast <2 x i64> %8 to <16 x i8>
! LLVMIR: %13 = bitcast <2 x i64> %9 to <16 x i8>
! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
! LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_assemble_acc_r4()
use, intrinsic :: mma
implicit none
vector(real(4)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_assemble_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_assemble_acc_r4
! CHECK-LABEL: @test_assemble_acc_r4
! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
! LLVMIR: %2 = alloca <4 x float>, i64 1, align 16
! LLVMIR: %3 = alloca <4 x float>, i64 1, align 16
! LLVMIR: %4 = alloca <4 x float>, i64 1, align 16
! LLVMIR: %5 = alloca <4 x float>, i64 1, align 16
! LLVMIR: %6 = load <4 x float>, ptr %2, align 16
! LLVMIR: %7 = load <4 x float>, ptr %3, align 16
! LLVMIR: %8 = load <4 x float>, ptr %4, align 16
! LLVMIR: %9 = load <4 x float>, ptr %5, align 16
! LLVMIR: %10 = bitcast <4 x float> %6 to <16 x i8>
! LLVMIR: %11 = bitcast <4 x float> %7 to <16 x i8>
! LLVMIR: %12 = bitcast <4 x float> %8 to <16 x i8>
! LLVMIR: %13 = bitcast <4 x float> %9 to <16 x i8>
! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
! LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_assemble_acc_r8()
use, intrinsic :: mma
implicit none
vector(real(8)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_assemble_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_assemble_acc_r8
!CHECK-LABEL: @test_assemble_acc_r8
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <2 x double>, i64 1, align 16
!LLVMIR: %3 = alloca <2 x double>, i64 1, align 16
!LLVMIR: %4 = alloca <2 x double>, i64 1, align 16
!LLVMIR: %5 = alloca <2 x double>, i64 1, align 16
!LLVMIR: %6 = load <2 x double>, ptr %2, align 16
!LLVMIR: %7 = load <2 x double>, ptr %3, align 16
!LLVMIR: %8 = load <2 x double>, ptr %4, align 16
!LLVMIR: %9 = load <2 x double>, ptr %5, align 16
!LLVMIR: %10 = bitcast <2 x double> %6 to <16 x i8>
!LLVMIR: %11 = bitcast <2 x double> %7 to <16 x i8>
!LLVMIR: %12 = bitcast <2 x double> %8 to <16 x i8>
!LLVMIR: %13 = bitcast <2 x double> %9 to <16 x i8>
!LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
!LLVMIR: store <512 x i1> %14, ptr %1, align 64
! mma_assemble_pair
subroutine test_mma_assemble_pair_i1()
use, intrinsic :: mma
implicit none
vector(integer(1)) vi10, vi11
__vector_pair :: vp
call mma_assemble_pair(vp, vi10, vi11)
end subroutine test_mma_assemble_pair_i1
!LLVMIR: @test_mma_assemble_pair_i1_
!LLVMIR: %1 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %4 = load <16 x i8>, ptr %1, align 16
!LLVMIR: %5 = load <16 x i8>, ptr %2, align 16
!LLVMIR: %6 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %4, <16 x i8> %5)
!LLVMIR: store <256 x i1> %6, ptr %3, align 32
subroutine test_mma_assemble_pair_i2()
use, intrinsic :: mma
implicit none
vector(integer(2)) vi10, vi11
__vector_pair :: vp
call mma_assemble_pair(vp, vi10, vi11)
end subroutine test_mma_assemble_pair_i2
!LLVMIR: @test_mma_assemble_pair_i2_
!LLVMIR: %1 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %4 = load <8 x i16>, ptr %1, align 16
!LLVMIR: %5 = load <8 x i16>, ptr %2, align 16
!LLVMIR: %6 = bitcast <8 x i16> %4 to <16 x i8>
!LLVMIR: %7 = bitcast <8 x i16> %5 to <16 x i8>
!LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
!LLVMIR: store <256 x i1> %8, ptr %3, align 32
subroutine test_mma_assemble_pair_i4()
use, intrinsic :: mma
implicit none
vector(integer(4)) vi10, vi11
__vector_pair :: vp
call mma_assemble_pair(vp, vi10, vi11)
end subroutine test_mma_assemble_pair_i4
!LLVMIR: @test_mma_assemble_pair_i4_
!LLVMIR: %1 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %4 = load <4 x i32>, ptr %1, align 16
!LLVMIR: %5 = load <4 x i32>, ptr %2, align 16
!LLVMIR: %6 = bitcast <4 x i32> %4 to <16 x i8>
!LLVMIR: %7 = bitcast <4 x i32> %5 to <16 x i8>
!LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
!LLVMIR: store <256 x i1> %8, ptr %3, align 32
subroutine test_mma_assemble_pair_i8()
use, intrinsic :: mma
implicit none
vector(integer(8)) vi10, vi11
__vector_pair :: vp
call mma_assemble_pair(vp, vi10, vi11)
end subroutine test_mma_assemble_pair_i8
!LLVMIR: @test_mma_assemble_pair_i8_
!LLVMIR: %1 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %4 = load <2 x i64>, ptr %1, align 16
!LLVMIR: %5 = load <2 x i64>, ptr %2, align 16
!LLVMIR: %6 = bitcast <2 x i64> %4 to <16 x i8>
!LLVMIR: %7 = bitcast <2 x i64> %5 to <16 x i8>
!LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
!LLVMIR: store <256 x i1> %8, ptr %3, align 32
subroutine test_mma_assemble_pair_u1()
use, intrinsic :: mma
implicit none
vector(unsigned(1)) vi10, vi11
__vector_pair :: vp
call mma_assemble_pair(vp, vi10, vi11)
end subroutine test_mma_assemble_pair_u1
!LLVMIR: @test_mma_assemble_pair_u1_
!LLVMIR: %1 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %4 = load <16 x i8>, ptr %1, align 16
!LLVMIR: %5 = load <16 x i8>, ptr %2, align 16
!LLVMIR: %6 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %4, <16 x i8> %5)
!LLVMIR: store <256 x i1> %6, ptr %3, align 32
subroutine test_mma_assemble_pair_u2()
use, intrinsic :: mma
implicit none
vector(unsigned(2)) vi10, vi11
__vector_pair :: vp
call mma_assemble_pair(vp, vi10, vi11)
end subroutine test_mma_assemble_pair_u2
!LLVMIR: @test_mma_assemble_pair_u2_
!LLVMIR: %1 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %4 = load <8 x i16>, ptr %1, align 16
!LLVMIR: %5 = load <8 x i16>, ptr %2, align 16
!LLVMIR: %6 = bitcast <8 x i16> %4 to <16 x i8>
!LLVMIR: %7 = bitcast <8 x i16> %5 to <16 x i8>
!LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
!LLVMIR: store <256 x i1> %8, ptr %3, align 32
subroutine test_mma_assemble_pair_u4()
use, intrinsic :: mma
implicit none
vector(unsigned(4)) vi10, vi11
__vector_pair :: vp
call mma_assemble_pair(vp, vi10, vi11)
end subroutine test_mma_assemble_pair_u4
!LLVMIR: @test_mma_assemble_pair_u4_
!LLVMIR: %1 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %4 = load <4 x i32>, ptr %1, align 16
!LLVMIR: %5 = load <4 x i32>, ptr %2, align 16
!LLVMIR: %6 = bitcast <4 x i32> %4 to <16 x i8>
!LLVMIR: %7 = bitcast <4 x i32> %5 to <16 x i8>
!LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
!LLVMIR: store <256 x i1> %8, ptr %3, align 32
subroutine test_mma_assemble_pair_u8()
use, intrinsic :: mma
implicit none
vector(unsigned(8)) vi10, vi11
__vector_pair :: vp
call mma_assemble_pair(vp, vi10, vi11)
end subroutine test_mma_assemble_pair_u8
!LLVMIR: @test_mma_assemble_pair_u8_
!LLVMIR: %1 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %4 = load <2 x i64>, ptr %1, align 16
!LLVMIR: %5 = load <2 x i64>, ptr %2, align 16
!LLVMIR: %6 = bitcast <2 x i64> %4 to <16 x i8>
!LLVMIR: %7 = bitcast <2 x i64> %5 to <16 x i8>
!LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
!LLVMIR: store <256 x i1> %8, ptr %3, align 32
subroutine test_mma_assemble_pair_r4()
use, intrinsic :: mma
implicit none
vector(real(4)) vi10, vi11
__vector_pair :: vp
call mma_assemble_pair(vp, vi10, vi11)
end subroutine test_mma_assemble_pair_r4
!LLVMIR: @test_mma_assemble_pair_r4_
!LLVMIR: %1 = alloca <4 x float>, i64 1, align 16
!LLVMIR: %2 = alloca <4 x float>, i64 1, align 16
!LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %4 = load <4 x float>, ptr %1, align 16
!LLVMIR: %5 = load <4 x float>, ptr %2, align 16
!LLVMIR: %6 = bitcast <4 x float> %4 to <16 x i8>
!LLVMIR: %7 = bitcast <4 x float> %5 to <16 x i8>
!LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
!LLVMIR: store <256 x i1> %8, ptr %3, align 32
subroutine test_mma_assemble_pair_r8()
use, intrinsic :: mma
implicit none
vector(real(8)) vi10, vi11
__vector_pair :: vp
call mma_assemble_pair(vp, vi10, vi11)
end subroutine test_mma_assemble_pair_r8
!LLVMIR: @test_mma_assemble_pair_r8_
!LLVMIR: %1 = alloca <2 x double>, i64 1, align 16
!LLVMIR: %2 = alloca <2 x double>, i64 1, align 16
!LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %4 = load <2 x double>, ptr %1, align 16
!LLVMIR: %5 = load <2 x double>, ptr %2, align 16
!LLVMIR: %6 = bitcast <2 x double> %4 to <16 x i8>
!LLVMIR: %7 = bitcast <2 x double> %5 to <16 x i8>
!LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
!LLVMIR: store <256 x i1> %8, ptr %3, align 32
! mma_disassemble_acc
subroutine test_mma_build_acc_i1()
use, intrinsic :: mma
implicit none
vector(integer(1)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_build_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_mma_build_acc_i1
!CHECK-LABEL: @test_mma_build_acc_i1
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %3 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %4 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %5 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %6 = load <16 x i8>, ptr %2, align 16
!LLVMIR: %7 = load <16 x i8>, ptr %3, align 16
!LLVMIR: %8 = load <16 x i8>, ptr %4, align 16
!LLVMIR: %9 = load <16 x i8>, ptr %5, align 16
!LLVMIR: %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %9, <16 x i8> %8, <16 x i8> %7, <16 x i8> %6)
!LLVMIR: store <512 x i1> %10, ptr %1, align 64
subroutine test_mma_build_acc_i2()
use, intrinsic :: mma
implicit none
vector(integer(2)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_build_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_mma_build_acc_i2
!CHECK-LABEL: @test_mma_build_acc_i2
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %3 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %4 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %5 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %6 = load <8 x i16>, ptr %2, align 16
!LLVMIR: %7 = load <8 x i16>, ptr %3, align 16
!LLVMIR: %8 = load <8 x i16>, ptr %4, align 16
!LLVMIR: %9 = load <8 x i16>, ptr %5, align 16
!LLVMIR: %10 = bitcast <8 x i16> %9 to <16 x i8>
!LLVMIR: %11 = bitcast <8 x i16> %8 to <16 x i8>
!LLVMIR: %12 = bitcast <8 x i16> %7 to <16 x i8>
!LLVMIR: %13 = bitcast <8 x i16> %6 to <16 x i8>
!LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
!LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_mma_build_acc_i4()
use, intrinsic :: mma
implicit none
vector(integer(4)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_build_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_mma_build_acc_i4
!CHECK-LABEL: @test_mma_build_acc_i4
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %3 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %4 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %5 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %6 = load <4 x i32>, ptr %2, align 16
!LLVMIR: %7 = load <4 x i32>, ptr %3, align 16
!LLVMIR: %8 = load <4 x i32>, ptr %4, align 16
!LLVMIR: %9 = load <4 x i32>, ptr %5, align 16
!LLVMIR: %10 = bitcast <4 x i32> %9 to <16 x i8>
!LLVMIR: %11 = bitcast <4 x i32> %8 to <16 x i8>
!LLVMIR: %12 = bitcast <4 x i32> %7 to <16 x i8>
!LLVMIR: %13 = bitcast <4 x i32> %6 to <16 x i8>
!LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
!LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_mma_build_acc_i8()
use, intrinsic :: mma
implicit none
vector(integer(8)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_build_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_mma_build_acc_i8
!CHECK-LABEL: @test_mma_build_acc_i8
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %3 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %4 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %5 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %6 = load <2 x i64>, ptr %2, align 16
!LLVMIR: %7 = load <2 x i64>, ptr %3, align 16
!LLVMIR: %8 = load <2 x i64>, ptr %4, align 16
!LLVMIR: %9 = load <2 x i64>, ptr %5, align 16
!LLVMIR: %10 = bitcast <2 x i64> %9 to <16 x i8>
!LLVMIR: %11 = bitcast <2 x i64> %8 to <16 x i8>
!LLVMIR: %12 = bitcast <2 x i64> %7 to <16 x i8>
!LLVMIR: %13 = bitcast <2 x i64> %6 to <16 x i8>
!LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
!LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_mma_build_acc_u1()
use, intrinsic :: mma
implicit none
vector(unsigned(1)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_build_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_mma_build_acc_u1
!CHECK-LABEL: @test_mma_build_acc_u1
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %3 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %4 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %5 = alloca <16 x i8>, i64 1, align 16
!LLVMIR: %6 = load <16 x i8>, ptr %2, align 16
!LLVMIR: %7 = load <16 x i8>, ptr %3, align 16
!LLVMIR: %8 = load <16 x i8>, ptr %4, align 16
!LLVMIR: %9 = load <16 x i8>, ptr %5, align 16
!LLVMIR: %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %9, <16 x i8> %8, <16 x i8> %7, <16 x i8> %6)
!LLVMIR: store <512 x i1> %10, ptr %1, align 64
subroutine test_mma_build_acc_u2()
use, intrinsic :: mma
implicit none
vector(unsigned(2)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_build_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_mma_build_acc_u2
!CHECK-LABEL: @test_mma_build_acc_u2
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %3 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %4 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %5 = alloca <8 x i16>, i64 1, align 16
!LLVMIR: %6 = load <8 x i16>, ptr %2, align 16
!LLVMIR: %7 = load <8 x i16>, ptr %3, align 16
!LLVMIR: %8 = load <8 x i16>, ptr %4, align 16
!LLVMIR: %9 = load <8 x i16>, ptr %5, align 16
!LLVMIR: %10 = bitcast <8 x i16> %9 to <16 x i8>
!LLVMIR: %11 = bitcast <8 x i16> %8 to <16 x i8>
!LLVMIR: %12 = bitcast <8 x i16> %7 to <16 x i8>
!LLVMIR: %13 = bitcast <8 x i16> %6 to <16 x i8>
!LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
!LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_mma_build_acc_u4()
use, intrinsic :: mma
implicit none
vector(unsigned(4)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_build_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_mma_build_acc_u4
!CHECK-LABEL: @test_mma_build_acc_u4
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %3 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %4 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %5 = alloca <4 x i32>, i64 1, align 16
!LLVMIR: %6 = load <4 x i32>, ptr %2, align 16
!LLVMIR: %7 = load <4 x i32>, ptr %3, align 16
!LLVMIR: %8 = load <4 x i32>, ptr %4, align 16
!LLVMIR: %9 = load <4 x i32>, ptr %5, align 16
!LLVMIR: %10 = bitcast <4 x i32> %9 to <16 x i8>
!LLVMIR: %11 = bitcast <4 x i32> %8 to <16 x i8>
!LLVMIR: %12 = bitcast <4 x i32> %7 to <16 x i8>
!LLVMIR: %13 = bitcast <4 x i32> %6 to <16 x i8>
!LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
!LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_mma_build_acc_u8()
use, intrinsic :: mma
implicit none
vector(unsigned(8)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_build_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_mma_build_acc_u8
!CHECK-LABEL: @test_mma_build_acc_u8
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %3 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %4 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %5 = alloca <2 x i64>, i64 1, align 16
!LLVMIR: %6 = load <2 x i64>, ptr %2, align 16
!LLVMIR: %7 = load <2 x i64>, ptr %3, align 16
!LLVMIR: %8 = load <2 x i64>, ptr %4, align 16
!LLVMIR: %9 = load <2 x i64>, ptr %5, align 16
!LLVMIR: %10 = bitcast <2 x i64> %9 to <16 x i8>
!LLVMIR: %11 = bitcast <2 x i64> %8 to <16 x i8>
!LLVMIR: %12 = bitcast <2 x i64> %7 to <16 x i8>
!LLVMIR: %13 = bitcast <2 x i64> %6 to <16 x i8>
!LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
!LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_mma_build_acc_r4()
use, intrinsic :: mma
implicit none
vector(real(4)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_build_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_mma_build_acc_r4
!CHECK-LABEL: @test_mma_build_acc_r4
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <4 x float>, i64 1, align 16
!LLVMIR: %3 = alloca <4 x float>, i64 1, align 16
!LLVMIR: %4 = alloca <4 x float>, i64 1, align 16
!LLVMIR: %5 = alloca <4 x float>, i64 1, align 16
!LLVMIR: %6 = load <4 x float>, ptr %2, align 16
!LLVMIR: %7 = load <4 x float>, ptr %3, align 16
!LLVMIR: %8 = load <4 x float>, ptr %4, align 16
!LLVMIR: %9 = load <4 x float>, ptr %5, align 16
!LLVMIR: %10 = bitcast <4 x float> %9 to <16 x i8>
!LLVMIR: %11 = bitcast <4 x float> %8 to <16 x i8>
!LLVMIR: %12 = bitcast <4 x float> %7 to <16 x i8>
!LLVMIR: %13 = bitcast <4 x float> %6 to <16 x i8>
!LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
!LLVMIR: store <512 x i1> %14, ptr %1, align 64
subroutine test_mma_build_acc_r8()
use, intrinsic :: mma
implicit none
vector(real(8)) vi10, vi11, vi12, vi13
__vector_quad :: cq
call mma_build_acc(cq, vi10, vi11, vi12, vi13)
end subroutine test_mma_build_acc_r8
!CHECK-LABEL: @test_mma_build_acc_r8
!LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %2 = alloca <2 x double>, i64 1, align 16
!LLVMIR: %3 = alloca <2 x double>, i64 1, align 16
!LLVMIR: %4 = alloca <2 x double>, i64 1, align 16
!LLVMIR: %5 = alloca <2 x double>, i64 1, align 16
!LLVMIR: %6 = load <2 x double>, ptr %2, align 16
!LLVMIR: %7 = load <2 x double>, ptr %3, align 16
!LLVMIR: %8 = load <2 x double>, ptr %4, align 16
!LLVMIR: %9 = load <2 x double>, ptr %5, align 16
!LLVMIR: %10 = bitcast <2 x double> %9 to <16 x i8>
!LLVMIR: %11 = bitcast <2 x double> %8 to <16 x i8>
!LLVMIR: %12 = bitcast <2 x double> %7 to <16 x i8>
!LLVMIR: %13 = bitcast <2 x double> %6 to <16 x i8>
!LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
!LLVMIR: store <512 x i1> %14, ptr %1, align 64
! mma_disassemble_acc
subroutine test_disassemble_acc()
use, intrinsic :: mma
implicit none
__vector_quad :: vq
real :: data
call mma_disassemble_acc(data, vq)
end subroutine
!CHECK-LABEL: @test_disassemble_acc_
!LLVMIR: %1 = alloca float, i64 1, align 4
!LLVMIR: %2 = alloca <512 x i1>, i64 1, align 64
!LLVMIR: %3 = load <512 x i1>, ptr %2, align 64
!LLVMIR: %4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %3)
!LLVMIR: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, ptr %1, align 16
! mma_disassemble_pair
subroutine test_disassemble_pair()
use, intrinsic :: mma
implicit none
__vector_pair :: vp
real :: data
call mma_disassemble_pair(data, vp)
end subroutine
!CHECK-LABEL: @test_disassemble_pair_
!LLVMIR: %1 = alloca float, i64 1, align 4
!LLVMIR: %2 = alloca <256 x i1>, i64 1, align 32
!LLVMIR: %3 = load <256 x i1>, ptr %2, align 32
!LLVMIR: %4 = call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %3)
!LLVMIR: store { <16 x i8>, <16 x i8> } %4, ptr %1, align 16