| ! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s |
| ! REQUIRES: target=powerpc{{.*}} |
| |
| ! mma_assemble_acc |
| |
| subroutine test_assemble_acc_i1() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(1)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_assemble_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_assemble_acc_i1 |
| |
| ! CHECK-LABEL: @test_assemble_acc_i1 |
| ! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| ! LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16 |
| ! LLVMIR: %3 = alloca <16 x i8>, i64 1, align 16 |
| ! LLVMIR: %4 = alloca <16 x i8>, i64 1, align 16 |
| ! LLVMIR: %5 = alloca <16 x i8>, i64 1, align 16 |
| ! LLVMIR: %6 = load <16 x i8>, ptr %2, align 16 |
| ! LLVMIR: %7 = load <16 x i8>, ptr %3, align 16 |
| ! LLVMIR: %8 = load <16 x i8>, ptr %4, align 16 |
| ! LLVMIR: %9 = load <16 x i8>, ptr %5, align 16 |
| ! LLVMIR: %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %6, <16 x i8> %7, <16 x i8> %8, <16 x i8> %9) |
| ! LLVMIR: store <512 x i1> %10, ptr %1, align 64 |
| |
| subroutine test_assemble_acc_i2() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(2)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_assemble_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_assemble_acc_i2 |
| |
| ! CHECK-LABEL: @test_assemble_acc_i2 |
| ! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| ! LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16 |
| ! LLVMIR: %3 = alloca <8 x i16>, i64 1, align 16 |
| ! LLVMIR: %4 = alloca <8 x i16>, i64 1, align 16 |
| ! LLVMIR: %5 = alloca <8 x i16>, i64 1, align 16 |
| ! LLVMIR: %6 = load <8 x i16>, ptr %2, align 16 |
| ! LLVMIR: %7 = load <8 x i16>, ptr %3, align 16 |
| ! LLVMIR: %8 = load <8 x i16>, ptr %4, align 16 |
| ! LLVMIR: %9 = load <8 x i16>, ptr %5, align 16 |
| ! LLVMIR: %10 = bitcast <8 x i16> %6 to <16 x i8> |
| ! LLVMIR: %11 = bitcast <8 x i16> %7 to <16 x i8> |
| ! LLVMIR: %12 = bitcast <8 x i16> %8 to <16 x i8> |
| ! LLVMIR: %13 = bitcast <8 x i16> %9 to <16 x i8> |
| ! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| ! LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| |
| subroutine test_assemble_acc_i4() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(4)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_assemble_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_assemble_acc_i4 |
| |
| ! CHECK-LABEL: @test_assemble_acc_i4 |
| ! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| ! LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16 |
| ! LLVMIR: %3 = alloca <4 x i32>, i64 1, align 16 |
| ! LLVMIR: %4 = alloca <4 x i32>, i64 1, align 16 |
| ! LLVMIR: %5 = alloca <4 x i32>, i64 1, align 16 |
| ! LLVMIR: %6 = load <4 x i32>, ptr %2, align 16 |
| ! LLVMIR: %7 = load <4 x i32>, ptr %3, align 16 |
| ! LLVMIR: %8 = load <4 x i32>, ptr %4, align 16 |
| ! LLVMIR: %9 = load <4 x i32>, ptr %5, align 16 |
| ! LLVMIR: %10 = bitcast <4 x i32> %6 to <16 x i8> |
| ! LLVMIR: %11 = bitcast <4 x i32> %7 to <16 x i8> |
| ! LLVMIR: %12 = bitcast <4 x i32> %8 to <16 x i8> |
| ! LLVMIR: %13 = bitcast <4 x i32> %9 to <16 x i8> |
| ! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| ! LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| subroutine test_assemble_acc_i8() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(8)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_assemble_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_assemble_acc_i8 |
| |
| ! CHECK-LABEL: @test_assemble_acc_i8 |
| ! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| ! LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16 |
| ! LLVMIR: %3 = alloca <2 x i64>, i64 1, align 16 |
| ! LLVMIR: %4 = alloca <2 x i64>, i64 1, align 16 |
| ! LLVMIR: %5 = alloca <2 x i64>, i64 1, align 16 |
| ! LLVMIR: %6 = load <2 x i64>, ptr %2, align 16 |
| ! LLVMIR: %7 = load <2 x i64>, ptr %3, align 16 |
| ! LLVMIR: %8 = load <2 x i64>, ptr %4, align 16 |
| ! LLVMIR: %9 = load <2 x i64>, ptr %5, align 16 |
| ! LLVMIR: %10 = bitcast <2 x i64> %6 to <16 x i8> |
| ! LLVMIR: %11 = bitcast <2 x i64> %7 to <16 x i8> |
| ! LLVMIR: %12 = bitcast <2 x i64> %8 to <16 x i8> |
| ! LLVMIR: %13 = bitcast <2 x i64> %9 to <16 x i8> |
| ! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| ! LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| |
| subroutine test_assemble_acc_u1() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(1)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_assemble_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_assemble_acc_u1 |
| |
| ! CHECK-LABEL: @test_assemble_acc_u1 |
| ! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| ! LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16 |
| ! LLVMIR: %3 = alloca <16 x i8>, i64 1, align 16 |
| ! LLVMIR: %4 = alloca <16 x i8>, i64 1, align 16 |
| ! LLVMIR: %5 = alloca <16 x i8>, i64 1, align 16 |
| ! LLVMIR: %6 = load <16 x i8>, ptr %2, align 16 |
| ! LLVMIR: %7 = load <16 x i8>, ptr %3, align 16 |
| ! LLVMIR: %8 = load <16 x i8>, ptr %4, align 16 |
| ! LLVMIR: %9 = load <16 x i8>, ptr %5, align 16 |
| ! LLVMIR: %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %6, <16 x i8> %7, <16 x i8> %8, <16 x i8> %9) |
| ! LLVMIR: store <512 x i1> %10, ptr %1, align 64 |
| |
| subroutine test_assemble_acc_u2() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(2)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_assemble_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_assemble_acc_u2 |
| |
| ! CHECK-LABEL: @test_assemble_acc_u2 |
| ! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| ! LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16 |
| ! LLVMIR: %3 = alloca <8 x i16>, i64 1, align 16 |
| ! LLVMIR: %4 = alloca <8 x i16>, i64 1, align 16 |
| ! LLVMIR: %5 = alloca <8 x i16>, i64 1, align 16 |
| ! LLVMIR: %6 = load <8 x i16>, ptr %2, align 16 |
| ! LLVMIR: %7 = load <8 x i16>, ptr %3, align 16 |
| ! LLVMIR: %8 = load <8 x i16>, ptr %4, align 16 |
| ! LLVMIR: %9 = load <8 x i16>, ptr %5, align 16 |
| ! LLVMIR: %10 = bitcast <8 x i16> %6 to <16 x i8> |
| ! LLVMIR: %11 = bitcast <8 x i16> %7 to <16 x i8> |
| ! LLVMIR: %12 = bitcast <8 x i16> %8 to <16 x i8> |
| ! LLVMIR: %13 = bitcast <8 x i16> %9 to <16 x i8> |
| ! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| ! LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| subroutine test_assemble_acc_u4() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(4)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_assemble_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_assemble_acc_u4 |
| |
| ! CHECK-LABEL: @test_assemble_acc_u4 |
| ! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| ! LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16 |
| ! LLVMIR: %3 = alloca <4 x i32>, i64 1, align 16 |
| ! LLVMIR: %4 = alloca <4 x i32>, i64 1, align 16 |
| ! LLVMIR: %5 = alloca <4 x i32>, i64 1, align 16 |
| ! LLVMIR: %6 = load <4 x i32>, ptr %2, align 16 |
| ! LLVMIR: %7 = load <4 x i32>, ptr %3, align 16 |
| ! LLVMIR: %8 = load <4 x i32>, ptr %4, align 16 |
| ! LLVMIR: %9 = load <4 x i32>, ptr %5, align 16 |
| ! LLVMIR: %10 = bitcast <4 x i32> %6 to <16 x i8> |
| ! LLVMIR: %11 = bitcast <4 x i32> %7 to <16 x i8> |
| ! LLVMIR: %12 = bitcast <4 x i32> %8 to <16 x i8> |
| ! LLVMIR: %13 = bitcast <4 x i32> %9 to <16 x i8> |
| ! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| ! LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| subroutine test_assemble_acc_u8() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(8)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_assemble_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_assemble_acc_u8 |
| |
| ! CHECK-LABEL: @test_assemble_acc_u8 |
| ! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| ! LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16 |
| ! LLVMIR: %3 = alloca <2 x i64>, i64 1, align 16 |
| ! LLVMIR: %4 = alloca <2 x i64>, i64 1, align 16 |
| ! LLVMIR: %5 = alloca <2 x i64>, i64 1, align 16 |
| ! LLVMIR: %6 = load <2 x i64>, ptr %2, align 16 |
| ! LLVMIR: %7 = load <2 x i64>, ptr %3, align 16 |
| ! LLVMIR: %8 = load <2 x i64>, ptr %4, align 16 |
| ! LLVMIR: %9 = load <2 x i64>, ptr %5, align 16 |
| ! LLVMIR: %10 = bitcast <2 x i64> %6 to <16 x i8> |
| ! LLVMIR: %11 = bitcast <2 x i64> %7 to <16 x i8> |
| ! LLVMIR: %12 = bitcast <2 x i64> %8 to <16 x i8> |
| ! LLVMIR: %13 = bitcast <2 x i64> %9 to <16 x i8> |
| ! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| ! LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| subroutine test_assemble_acc_r4() |
| use, intrinsic :: mma |
| implicit none |
| vector(real(4)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_assemble_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_assemble_acc_r4 |
| |
| ! CHECK-LABEL: @test_assemble_acc_r4 |
| ! LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| ! LLVMIR: %2 = alloca <4 x float>, i64 1, align 16 |
| ! LLVMIR: %3 = alloca <4 x float>, i64 1, align 16 |
| ! LLVMIR: %4 = alloca <4 x float>, i64 1, align 16 |
| ! LLVMIR: %5 = alloca <4 x float>, i64 1, align 16 |
| ! LLVMIR: %6 = load <4 x float>, ptr %2, align 16 |
| ! LLVMIR: %7 = load <4 x float>, ptr %3, align 16 |
| ! LLVMIR: %8 = load <4 x float>, ptr %4, align 16 |
| ! LLVMIR: %9 = load <4 x float>, ptr %5, align 16 |
| ! LLVMIR: %10 = bitcast <4 x float> %6 to <16 x i8> |
| ! LLVMIR: %11 = bitcast <4 x float> %7 to <16 x i8> |
| ! LLVMIR: %12 = bitcast <4 x float> %8 to <16 x i8> |
| ! LLVMIR: %13 = bitcast <4 x float> %9 to <16 x i8> |
| ! LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| ! LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| subroutine test_assemble_acc_r8() |
| use, intrinsic :: mma |
| implicit none |
| vector(real(8)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_assemble_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_assemble_acc_r8 |
| |
| !CHECK-LABEL: @test_assemble_acc_r8 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <2 x double>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <2 x double>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <2 x double>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <2 x double>, i64 1, align 16 |
| !LLVMIR: %6 = load <2 x double>, ptr %2, align 16 |
| !LLVMIR: %7 = load <2 x double>, ptr %3, align 16 |
| !LLVMIR: %8 = load <2 x double>, ptr %4, align 16 |
| !LLVMIR: %9 = load <2 x double>, ptr %5, align 16 |
| !LLVMIR: %10 = bitcast <2 x double> %6 to <16 x i8> |
| !LLVMIR: %11 = bitcast <2 x double> %7 to <16 x i8> |
| !LLVMIR: %12 = bitcast <2 x double> %8 to <16 x i8> |
| !LLVMIR: %13 = bitcast <2 x double> %9 to <16 x i8> |
| !LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| !LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| ! mma_assemble_pair |
| |
| subroutine test_mma_assemble_pair_i1() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(1)) vi10, vi11 |
| __vector_pair :: vp |
| call mma_assemble_pair(vp, vi10, vi11) |
| end subroutine test_mma_assemble_pair_i1 |
| |
| !LLVMIR: @test_mma_assemble_pair_i1_ |
| !LLVMIR: %1 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %4 = load <16 x i8>, ptr %1, align 16 |
| !LLVMIR: %5 = load <16 x i8>, ptr %2, align 16 |
| !LLVMIR: %6 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %4, <16 x i8> %5) |
| !LLVMIR: store <256 x i1> %6, ptr %3, align 32 |
| |
| subroutine test_mma_assemble_pair_i2() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(2)) vi10, vi11 |
| __vector_pair :: vp |
| call mma_assemble_pair(vp, vi10, vi11) |
| end subroutine test_mma_assemble_pair_i2 |
| |
| !LLVMIR: @test_mma_assemble_pair_i2_ |
| !LLVMIR: %1 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %4 = load <8 x i16>, ptr %1, align 16 |
| !LLVMIR: %5 = load <8 x i16>, ptr %2, align 16 |
| !LLVMIR: %6 = bitcast <8 x i16> %4 to <16 x i8> |
| !LLVMIR: %7 = bitcast <8 x i16> %5 to <16 x i8> |
| !LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7) |
| !LLVMIR: store <256 x i1> %8, ptr %3, align 32 |
| |
| subroutine test_mma_assemble_pair_i4() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(4)) vi10, vi11 |
| __vector_pair :: vp |
| call mma_assemble_pair(vp, vi10, vi11) |
| end subroutine test_mma_assemble_pair_i4 |
| |
| !LLVMIR: @test_mma_assemble_pair_i4_ |
| !LLVMIR: %1 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %4 = load <4 x i32>, ptr %1, align 16 |
| !LLVMIR: %5 = load <4 x i32>, ptr %2, align 16 |
| !LLVMIR: %6 = bitcast <4 x i32> %4 to <16 x i8> |
| !LLVMIR: %7 = bitcast <4 x i32> %5 to <16 x i8> |
| !LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7) |
| !LLVMIR: store <256 x i1> %8, ptr %3, align 32 |
| |
| subroutine test_mma_assemble_pair_i8() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(8)) vi10, vi11 |
| __vector_pair :: vp |
| call mma_assemble_pair(vp, vi10, vi11) |
| end subroutine test_mma_assemble_pair_i8 |
| |
| !LLVMIR: @test_mma_assemble_pair_i8_ |
| !LLVMIR: %1 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %4 = load <2 x i64>, ptr %1, align 16 |
| !LLVMIR: %5 = load <2 x i64>, ptr %2, align 16 |
| !LLVMIR: %6 = bitcast <2 x i64> %4 to <16 x i8> |
| !LLVMIR: %7 = bitcast <2 x i64> %5 to <16 x i8> |
| !LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7) |
| !LLVMIR: store <256 x i1> %8, ptr %3, align 32 |
| |
| subroutine test_mma_assemble_pair_u1() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(1)) vi10, vi11 |
| __vector_pair :: vp |
| call mma_assemble_pair(vp, vi10, vi11) |
| end subroutine test_mma_assemble_pair_u1 |
| |
| !LLVMIR: @test_mma_assemble_pair_u1_ |
| !LLVMIR: %1 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %4 = load <16 x i8>, ptr %1, align 16 |
| !LLVMIR: %5 = load <16 x i8>, ptr %2, align 16 |
| !LLVMIR: %6 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %4, <16 x i8> %5) |
| !LLVMIR: store <256 x i1> %6, ptr %3, align 32 |
| |
| subroutine test_mma_assemble_pair_u2() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(2)) vi10, vi11 |
| __vector_pair :: vp |
| call mma_assemble_pair(vp, vi10, vi11) |
| end subroutine test_mma_assemble_pair_u2 |
| |
| !LLVMIR: @test_mma_assemble_pair_u2_ |
| !LLVMIR: %1 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %4 = load <8 x i16>, ptr %1, align 16 |
| !LLVMIR: %5 = load <8 x i16>, ptr %2, align 16 |
| !LLVMIR: %6 = bitcast <8 x i16> %4 to <16 x i8> |
| !LLVMIR: %7 = bitcast <8 x i16> %5 to <16 x i8> |
| !LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7) |
| !LLVMIR: store <256 x i1> %8, ptr %3, align 32 |
| |
| subroutine test_mma_assemble_pair_u4() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(4)) vi10, vi11 |
| __vector_pair :: vp |
| call mma_assemble_pair(vp, vi10, vi11) |
| end subroutine test_mma_assemble_pair_u4 |
| |
| !LLVMIR: @test_mma_assemble_pair_u4_ |
| !LLVMIR: %1 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %4 = load <4 x i32>, ptr %1, align 16 |
| !LLVMIR: %5 = load <4 x i32>, ptr %2, align 16 |
| !LLVMIR: %6 = bitcast <4 x i32> %4 to <16 x i8> |
| !LLVMIR: %7 = bitcast <4 x i32> %5 to <16 x i8> |
| !LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7) |
| !LLVMIR: store <256 x i1> %8, ptr %3, align 32 |
| |
| subroutine test_mma_assemble_pair_u8() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(8)) vi10, vi11 |
| __vector_pair :: vp |
| call mma_assemble_pair(vp, vi10, vi11) |
| end subroutine test_mma_assemble_pair_u8 |
| |
| !LLVMIR: @test_mma_assemble_pair_u8_ |
| !LLVMIR: %1 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %4 = load <2 x i64>, ptr %1, align 16 |
| !LLVMIR: %5 = load <2 x i64>, ptr %2, align 16 |
| !LLVMIR: %6 = bitcast <2 x i64> %4 to <16 x i8> |
| !LLVMIR: %7 = bitcast <2 x i64> %5 to <16 x i8> |
| !LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7) |
| !LLVMIR: store <256 x i1> %8, ptr %3, align 32 |
| |
| subroutine test_mma_assemble_pair_r4() |
| use, intrinsic :: mma |
| implicit none |
| vector(real(4)) vi10, vi11 |
| __vector_pair :: vp |
| call mma_assemble_pair(vp, vi10, vi11) |
| end subroutine test_mma_assemble_pair_r4 |
| |
| !LLVMIR: @test_mma_assemble_pair_r4_ |
| !LLVMIR: %1 = alloca <4 x float>, i64 1, align 16 |
| !LLVMIR: %2 = alloca <4 x float>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %4 = load <4 x float>, ptr %1, align 16 |
| !LLVMIR: %5 = load <4 x float>, ptr %2, align 16 |
| !LLVMIR: %6 = bitcast <4 x float> %4 to <16 x i8> |
| !LLVMIR: %7 = bitcast <4 x float> %5 to <16 x i8> |
| !LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7) |
| !LLVMIR: store <256 x i1> %8, ptr %3, align 32 |
| |
| subroutine test_mma_assemble_pair_r8() |
| use, intrinsic :: mma |
| implicit none |
| vector(real(8)) vi10, vi11 |
| __vector_pair :: vp |
| call mma_assemble_pair(vp, vi10, vi11) |
| end subroutine test_mma_assemble_pair_r8 |
| |
| !LLVMIR: @test_mma_assemble_pair_r8_ |
| !LLVMIR: %1 = alloca <2 x double>, i64 1, align 16 |
| !LLVMIR: %2 = alloca <2 x double>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %4 = load <2 x double>, ptr %1, align 16 |
| !LLVMIR: %5 = load <2 x double>, ptr %2, align 16 |
| !LLVMIR: %6 = bitcast <2 x double> %4 to <16 x i8> |
| !LLVMIR: %7 = bitcast <2 x double> %5 to <16 x i8> |
| !LLVMIR: %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7) |
| !LLVMIR: store <256 x i1> %8, ptr %3, align 32 |
| |
| ! mma_disassemble_acc |
| |
| subroutine test_mma_build_acc_i1() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(1)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_build_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_mma_build_acc_i1 |
| |
| !CHECK-LABEL: @test_mma_build_acc_i1 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %6 = load <16 x i8>, ptr %2, align 16 |
| !LLVMIR: %7 = load <16 x i8>, ptr %3, align 16 |
| !LLVMIR: %8 = load <16 x i8>, ptr %4, align 16 |
| !LLVMIR: %9 = load <16 x i8>, ptr %5, align 16 |
| !LLVMIR: %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %9, <16 x i8> %8, <16 x i8> %7, <16 x i8> %6) |
| !LLVMIR: store <512 x i1> %10, ptr %1, align 64 |
| |
| subroutine test_mma_build_acc_i2() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(2)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_build_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_mma_build_acc_i2 |
| |
| !CHECK-LABEL: @test_mma_build_acc_i2 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %6 = load <8 x i16>, ptr %2, align 16 |
| !LLVMIR: %7 = load <8 x i16>, ptr %3, align 16 |
| !LLVMIR: %8 = load <8 x i16>, ptr %4, align 16 |
| !LLVMIR: %9 = load <8 x i16>, ptr %5, align 16 |
| !LLVMIR: %10 = bitcast <8 x i16> %9 to <16 x i8> |
| !LLVMIR: %11 = bitcast <8 x i16> %8 to <16 x i8> |
| !LLVMIR: %12 = bitcast <8 x i16> %7 to <16 x i8> |
| !LLVMIR: %13 = bitcast <8 x i16> %6 to <16 x i8> |
| !LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| !LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| subroutine test_mma_build_acc_i4() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(4)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_build_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_mma_build_acc_i4 |
| |
| !CHECK-LABEL: @test_mma_build_acc_i4 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %6 = load <4 x i32>, ptr %2, align 16 |
| !LLVMIR: %7 = load <4 x i32>, ptr %3, align 16 |
| !LLVMIR: %8 = load <4 x i32>, ptr %4, align 16 |
| !LLVMIR: %9 = load <4 x i32>, ptr %5, align 16 |
| !LLVMIR: %10 = bitcast <4 x i32> %9 to <16 x i8> |
| !LLVMIR: %11 = bitcast <4 x i32> %8 to <16 x i8> |
| !LLVMIR: %12 = bitcast <4 x i32> %7 to <16 x i8> |
| !LLVMIR: %13 = bitcast <4 x i32> %6 to <16 x i8> |
| !LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| !LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| subroutine test_mma_build_acc_i8() |
| use, intrinsic :: mma |
| implicit none |
| vector(integer(8)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_build_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_mma_build_acc_i8 |
| |
| !CHECK-LABEL: @test_mma_build_acc_i8 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %6 = load <2 x i64>, ptr %2, align 16 |
| !LLVMIR: %7 = load <2 x i64>, ptr %3, align 16 |
| !LLVMIR: %8 = load <2 x i64>, ptr %4, align 16 |
| !LLVMIR: %9 = load <2 x i64>, ptr %5, align 16 |
| !LLVMIR: %10 = bitcast <2 x i64> %9 to <16 x i8> |
| !LLVMIR: %11 = bitcast <2 x i64> %8 to <16 x i8> |
| !LLVMIR: %12 = bitcast <2 x i64> %7 to <16 x i8> |
| !LLVMIR: %13 = bitcast <2 x i64> %6 to <16 x i8> |
| !LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| !LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| subroutine test_mma_build_acc_u1() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(1)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_build_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_mma_build_acc_u1 |
| |
| !CHECK-LABEL: @test_mma_build_acc_u1 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <16 x i8>, i64 1, align 16 |
| !LLVMIR: %6 = load <16 x i8>, ptr %2, align 16 |
| !LLVMIR: %7 = load <16 x i8>, ptr %3, align 16 |
| !LLVMIR: %8 = load <16 x i8>, ptr %4, align 16 |
| !LLVMIR: %9 = load <16 x i8>, ptr %5, align 16 |
| !LLVMIR: %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %9, <16 x i8> %8, <16 x i8> %7, <16 x i8> %6) |
| !LLVMIR: store <512 x i1> %10, ptr %1, align 64 |
| |
| subroutine test_mma_build_acc_u2() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(2)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_build_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_mma_build_acc_u2 |
| |
| !CHECK-LABEL: @test_mma_build_acc_u2 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <8 x i16>, i64 1, align 16 |
| !LLVMIR: %6 = load <8 x i16>, ptr %2, align 16 |
| !LLVMIR: %7 = load <8 x i16>, ptr %3, align 16 |
| !LLVMIR: %8 = load <8 x i16>, ptr %4, align 16 |
| !LLVMIR: %9 = load <8 x i16>, ptr %5, align 16 |
| !LLVMIR: %10 = bitcast <8 x i16> %9 to <16 x i8> |
| !LLVMIR: %11 = bitcast <8 x i16> %8 to <16 x i8> |
| !LLVMIR: %12 = bitcast <8 x i16> %7 to <16 x i8> |
| !LLVMIR: %13 = bitcast <8 x i16> %6 to <16 x i8> |
| !LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| !LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| subroutine test_mma_build_acc_u4() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(4)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_build_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_mma_build_acc_u4 |
| |
| !CHECK-LABEL: @test_mma_build_acc_u4 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <4 x i32>, i64 1, align 16 |
| !LLVMIR: %6 = load <4 x i32>, ptr %2, align 16 |
| !LLVMIR: %7 = load <4 x i32>, ptr %3, align 16 |
| !LLVMIR: %8 = load <4 x i32>, ptr %4, align 16 |
| !LLVMIR: %9 = load <4 x i32>, ptr %5, align 16 |
| !LLVMIR: %10 = bitcast <4 x i32> %9 to <16 x i8> |
| !LLVMIR: %11 = bitcast <4 x i32> %8 to <16 x i8> |
| !LLVMIR: %12 = bitcast <4 x i32> %7 to <16 x i8> |
| !LLVMIR: %13 = bitcast <4 x i32> %6 to <16 x i8> |
| !LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| !LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| subroutine test_mma_build_acc_u8() |
| use, intrinsic :: mma |
| implicit none |
| vector(unsigned(8)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_build_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_mma_build_acc_u8 |
| |
| !CHECK-LABEL: @test_mma_build_acc_u8 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <2 x i64>, i64 1, align 16 |
| !LLVMIR: %6 = load <2 x i64>, ptr %2, align 16 |
| !LLVMIR: %7 = load <2 x i64>, ptr %3, align 16 |
| !LLVMIR: %8 = load <2 x i64>, ptr %4, align 16 |
| !LLVMIR: %9 = load <2 x i64>, ptr %5, align 16 |
| !LLVMIR: %10 = bitcast <2 x i64> %9 to <16 x i8> |
| !LLVMIR: %11 = bitcast <2 x i64> %8 to <16 x i8> |
| !LLVMIR: %12 = bitcast <2 x i64> %7 to <16 x i8> |
| !LLVMIR: %13 = bitcast <2 x i64> %6 to <16 x i8> |
| !LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| !LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| |
| subroutine test_mma_build_acc_r4() |
| use, intrinsic :: mma |
| implicit none |
| vector(real(4)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_build_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_mma_build_acc_r4 |
| |
| !CHECK-LABEL: @test_mma_build_acc_r4 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <4 x float>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <4 x float>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <4 x float>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <4 x float>, i64 1, align 16 |
| !LLVMIR: %6 = load <4 x float>, ptr %2, align 16 |
| !LLVMIR: %7 = load <4 x float>, ptr %3, align 16 |
| !LLVMIR: %8 = load <4 x float>, ptr %4, align 16 |
| !LLVMIR: %9 = load <4 x float>, ptr %5, align 16 |
| !LLVMIR: %10 = bitcast <4 x float> %9 to <16 x i8> |
| !LLVMIR: %11 = bitcast <4 x float> %8 to <16 x i8> |
| !LLVMIR: %12 = bitcast <4 x float> %7 to <16 x i8> |
| !LLVMIR: %13 = bitcast <4 x float> %6 to <16 x i8> |
| !LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| !LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| |
| subroutine test_mma_build_acc_r8() |
| use, intrinsic :: mma |
| implicit none |
| vector(real(8)) vi10, vi11, vi12, vi13 |
| __vector_quad :: cq |
| call mma_build_acc(cq, vi10, vi11, vi12, vi13) |
| end subroutine test_mma_build_acc_r8 |
| |
| !CHECK-LABEL: @test_mma_build_acc_r8 |
| !LLVMIR: %1 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %2 = alloca <2 x double>, i64 1, align 16 |
| !LLVMIR: %3 = alloca <2 x double>, i64 1, align 16 |
| !LLVMIR: %4 = alloca <2 x double>, i64 1, align 16 |
| !LLVMIR: %5 = alloca <2 x double>, i64 1, align 16 |
| !LLVMIR: %6 = load <2 x double>, ptr %2, align 16 |
| !LLVMIR: %7 = load <2 x double>, ptr %3, align 16 |
| !LLVMIR: %8 = load <2 x double>, ptr %4, align 16 |
| !LLVMIR: %9 = load <2 x double>, ptr %5, align 16 |
| !LLVMIR: %10 = bitcast <2 x double> %9 to <16 x i8> |
| !LLVMIR: %11 = bitcast <2 x double> %8 to <16 x i8> |
| !LLVMIR: %12 = bitcast <2 x double> %7 to <16 x i8> |
| !LLVMIR: %13 = bitcast <2 x double> %6 to <16 x i8> |
| !LLVMIR: %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13) |
| !LLVMIR: store <512 x i1> %14, ptr %1, align 64 |
| |
| ! mma_disassemble_acc |
| |
| subroutine test_disassemble_acc() |
| use, intrinsic :: mma |
| implicit none |
| __vector_quad :: vq |
| real :: data |
| call mma_disassemble_acc(data, vq) |
| end subroutine |
| |
| !CHECK-LABEL: @test_disassemble_acc_ |
| !LLVMIR: %1 = alloca float, i64 1, align 4 |
| !LLVMIR: %2 = alloca <512 x i1>, i64 1, align 64 |
| !LLVMIR: %3 = load <512 x i1>, ptr %2, align 64 |
| !LLVMIR: %4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %3) |
| !LLVMIR: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, ptr %1, align 16 |
| |
| ! mma_disassemble_pair |
| |
| subroutine test_disassemble_pair() |
| use, intrinsic :: mma |
| implicit none |
| __vector_pair :: vp |
| real :: data |
| call mma_disassemble_pair(data, vp) |
| end subroutine |
| |
| !CHECK-LABEL: @test_disassemble_pair_ |
| !LLVMIR: %1 = alloca float, i64 1, align 4 |
| !LLVMIR: %2 = alloca <256 x i1>, i64 1, align 32 |
| !LLVMIR: %3 = load <256 x i1>, ptr %2, align 32 |
| !LLVMIR: %4 = call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %3) |
| !LLVMIR: store { <16 x i8>, <16 x i8> } %4, ptr %1, align 16 |