lib/Optimizer/Builder/PPCIntrinsicCall.cpp - llvm-project/flang - Git at Google

 //===-- PPCIntrinsicCall.cpp ----------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // Helper routines for constructing the FIR dialect of MLIR for PowerPC
 // intrinsics. Extensive use of MLIR interfaces and MLIR's coding style
 // (https://mlir.llvm.org/getting_started/DeveloperGuide/) is used in this
 // module.
 //
 //===----------------------------------------------------------------------===//

 #include "flang/Optimizer/Builder/PPCIntrinsicCall.h"
 #include "flang/Evaluate/common.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/MutableBox.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"

 namespace fir {

 using PI = PPCIntrinsicLibrary;

 // PPC specific intrinsic handlers.
 static constexpr IntrinsicHandler ppcHandlers[]{
     {"__ppc_mma_assemble_acc",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::AssembleAcc, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr},
        {"arg1", asValue},
        {"arg2", asValue},
        {"arg3", asValue},
        {"arg4", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_assemble_pair",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::AssemblePair, MMAHandlerOp::SubToFunc>),
      {{{"pair", asAddr}, {"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_build_acc",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::AssembleAcc,
                          MMAHandlerOp::SubToFuncReverseArgOnLE>),
      {{{"acc", asAddr},
        {"arg1", asValue},
        {"arg2", asValue},
        {"arg3", asValue},
        {"arg4", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_disassemble_acc",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::DisassembleAcc, MMAHandlerOp::SubToFunc>),
      {{{"data", asAddr}, {"acc", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_disassemble_pair",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::DisassemblePair, MMAHandlerOp::SubToFunc>),
      {{{"data", asAddr}, {"pair", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvbf16ger2_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvbf16ger2, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvbf16ger2nn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvbf16ger2nn,
                          MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvbf16ger2np",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvbf16ger2np,
                          MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvbf16ger2pn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pn,
                          MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvbf16ger2pp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pp,
                          MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf16ger2_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf16ger2, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf16ger2nn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf16ger2np",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf16ger2np, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf16ger2pn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf16ger2pp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf32ger",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf32ger, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf32gernn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf32gernn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf32gernp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf32gernp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf32gerpn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf32gerpn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf32gerpp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf32gerpp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf64ger",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf64ger, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf64gernn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf64gernn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf64gernp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf64gernp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf64gerpn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf64gerpn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvf64gerpp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvf64gerpp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvi16ger2_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvi16ger2, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvi16ger2pp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvi16ger2pp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvi16ger2s",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvi16ger2s, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvi16ger2spp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvi16ger2spp,
                          MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvi4ger8_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvi4ger8, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvi4ger8pp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvi4ger8pp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvi8ger4_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvi8ger4, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvi8ger4pp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvi8ger4pp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_pmxvi8ger4spp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Pmxvi8ger4spp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr},
        {"a", asValue},
        {"b", asValue},
        {"xmask", asValue},
        {"ymask", asValue},
        {"pmask", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvbf16ger2_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvbf16ger2, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvbf16ger2nn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvbf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvbf16ger2np",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvbf16ger2np, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvbf16ger2pn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvbf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvbf16ger2pp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvbf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf16ger2_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf16ger2, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf16ger2nn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf16ger2np",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf16ger2np, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf16ger2pn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf16ger2pp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf32ger",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf32ger, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf32gernn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf32gernn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf32gernp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf32gernp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf32gerpn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf32gerpn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf32gerpp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf32gerpp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf64ger",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf64ger, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf64gernn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf64gernn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf64gernp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf64gernp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf64gerpn",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf64gerpn, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvf64gerpp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvf64gerpp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvi16ger2_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvi16ger2, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvi16ger2pp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvi16ger2pp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvi16ger2s",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvi16ger2s, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvi16ger2spp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvi16ger2spp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvi4ger8_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvi4ger8, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvi4ger8pp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvi4ger8pp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvi8ger4_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvi8ger4, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvi8ger4pp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvi8ger4pp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xvi8ger4spp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xvi8ger4spp, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
      /*isElemental=*/true},
     {"__ppc_mma_xxmfacc",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xxmfacc, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}}},
      /*isElemental=*/true},
     {"__ppc_mma_xxmtacc",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xxmtacc, MMAHandlerOp::FirstArgIsResult>),
      {{{"acc", asAddr}}},
      /*isElemental=*/true},
     {"__ppc_mma_xxsetaccz",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genMmaIntr<MMAOp::Xxsetaccz, MMAHandlerOp::SubToFunc>),
      {{{"acc", asAddr}}},
      /*isElemental=*/true},
     {"__ppc_mtfsf",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<false>),
      {{{"mask", asValue}, {"r", asValue}}},
      /*isElemental=*/false},
     {"__ppc_mtfsfi",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<true>),
      {{{"bf", asValue}, {"i", asValue}}},
      /*isElemental=*/false},
     {"__ppc_vec_abs",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecAbs),
      {{{"arg1", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_add",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecAddAndMulSubXor<VecOp::Add>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_and",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecAddAndMulSubXor<VecOp::And>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_any_ge",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecAnyCompare<VecOp::Anyge>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_cmpge",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecCmp<VecOp::Cmpge>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_cmpgt",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecCmp<VecOp::Cmpgt>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_cmple",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecCmp<VecOp::Cmple>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_cmplt",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecCmp<VecOp::Cmplt>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_convert",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecConvert<VecOp::Convert>),
      {{{"v", asValue}, {"mold", asValue}}},
      /*isElemental=*/false},
     {"__ppc_vec_ctf",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecConvert<VecOp::Ctf>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_cvf",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecConvert<VecOp::Cvf>),
      {{{"arg1", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_extract",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecExtract),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_insert",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecInsert),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_ld",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdCallGrp<VecOp::Ld>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_lde",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdCallGrp<VecOp::Lde>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_ldl",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdCallGrp<VecOp::Ldl>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_lvsl",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLvsGrp<VecOp::Lvsl>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_lvsr",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLvsGrp<VecOp::Lvsr>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_lxv",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdNoCallGrp<VecOp::Lxv>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_lxvp",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdCallGrp<VecOp::Lxvp>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_mergeh",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecMerge<VecOp::Mergeh>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_mergel",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecMerge<VecOp::Mergel>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_msub",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecNmaddMsub<VecOp::Msub>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_mul",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecAddAndMulSubXor<VecOp::Mul>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_nmadd",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecNmaddMsub<VecOp::Nmadd>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_perm",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecPerm<VecOp::Perm>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_permi",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecPerm<VecOp::Permi>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_sel",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecSel),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_sl",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecShift<VecOp::Sl>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_sld",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecShift<VecOp::Sld>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_sldw",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecShift<VecOp::Sldw>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_sll",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecShift<VecOp::Sll>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_slo",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecShift<VecOp::Slo>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_splat",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecSplat<VecOp::Splat>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_splat_s32_",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecSplat<VecOp::Splat_s32>),
      {{{"arg1", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_splats",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecSplat<VecOp::Splats>),
      {{{"arg1", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_sr",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecShift<VecOp::Sr>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_srl",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecShift<VecOp::Srl>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_sro",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecShift<VecOp::Sro>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_st",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genVecStore<VecOp::St>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_ste",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genVecStore<VecOp::Ste>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_stxv",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genVecXStore<VecOp::Stxv>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_stxvp",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genVecStore<VecOp::Stxvp>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_sub",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecAddAndMulSubXor<VecOp::Sub>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_xl",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlGrp),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_xl_be",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdNoCallGrp<VecOp::Xlbe>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_xld2_",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdCallGrp<VecOp::Xld2>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_xlds",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlds),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_xlw4_",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdCallGrp<VecOp::Xlw4>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_xor",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecAddAndMulSubXor<VecOp::Xor>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
     {"__ppc_vec_xst",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genVecXStore<VecOp::Xst>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_xst_be",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genVecXStore<VecOp::Xst_be>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_xstd2_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genVecXStore<VecOp::Xstd2>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
      /*isElemental=*/false},
     {"__ppc_vec_xstw4_",
      static_cast<IntrinsicLibrary::SubroutineGenerator>(
          &PI::genVecXStore<VecOp::Xstw4>),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
      /*isElemental=*/false},
 };

 static constexpr MathOperation ppcMathOperations[] = {
     // fcfi is just another name for fcfid, there is no llvm.ppc.fcfi.
     {"__ppc_fcfi", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fcfid", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fcfud", "llvm.ppc.fcfud", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fctid", "llvm.ppc.fctid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fctidz", "llvm.ppc.fctidz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fctiw", "llvm.ppc.fctiw", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fctiwz", "llvm.ppc.fctiwz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fctudz", "llvm.ppc.fctudz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fctuwz", "llvm.ppc.fctuwz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fmadd", "llvm.fma.f32",
      genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
      genMathOp<mlir::math::FmaOp>},
     {"__ppc_fmadd", "llvm.fma.f64",
      genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
      genMathOp<mlir::math::FmaOp>},
     {"__ppc_fmsub", "llvm.ppc.fmsubs",
      genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
      genLibCall},
     {"__ppc_fmsub", "llvm.ppc.fmsub",
      genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fnabs", "llvm.ppc.fnabss", genFuncType<Ty::Real<4>, Ty::Real<4>>,
      genLibCall},
     {"__ppc_fnabs", "llvm.ppc.fnabs", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fnmadd", "llvm.ppc.fnmadds",
      genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
      genLibCall},
     {"__ppc_fnmadd", "llvm.ppc.fnmadd",
      genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fnmsub", "llvm.ppc.fnmsub.f32",
      genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
      genLibCall},
     {"__ppc_fnmsub", "llvm.ppc.fnmsub.f64",
      genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fre", "llvm.ppc.fre", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_fres", "llvm.ppc.fres", genFuncType<Ty::Real<4>, Ty::Real<4>>,
      genLibCall},
     {"__ppc_frsqrte", "llvm.ppc.frsqrte", genFuncType<Ty::Real<8>, Ty::Real<8>>,
      genLibCall},
     {"__ppc_frsqrtes", "llvm.ppc.frsqrtes",
      genFuncType<Ty::Real<4>, Ty::Real<4>>, genLibCall},
     {"__ppc_vec_cvbf16spn", "llvm.ppc.vsx.xvcvbf16spn",
      genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall},
     {"__ppc_vec_cvspbf16_", "llvm.ppc.vsx.xvcvspbf16",
      genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall},
     {"__ppc_vec_madd", "llvm.fma.v4f32",
      genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
                  Ty::RealVector<4>>,
      genLibCall},
     {"__ppc_vec_madd", "llvm.fma.v2f64",
      genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
                  Ty::RealVector<8>>,
      genLibCall},
     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsb",
      genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
                  Ty::IntegerVector<1>>,
      genLibCall},
     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsh",
      genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
                  Ty::IntegerVector<2>>,
      genLibCall},
     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsw",
      genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
                  Ty::IntegerVector<4>>,
      genLibCall},
     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsd",
      genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
                  Ty::IntegerVector<8>>,
      genLibCall},
     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxub",
      genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
                  Ty::UnsignedVector<1>>,
      genLibCall},
     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuh",
      genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
                  Ty::UnsignedVector<2>>,
      genLibCall},
     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuw",
      genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
                  Ty::UnsignedVector<4>>,
      genLibCall},
     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxud",
      genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
                  Ty::UnsignedVector<8>>,
      genLibCall},
     {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxsp",
      genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
      genLibCall},
     {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxdp",
      genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
      genLibCall},
     {"__ppc_vec_min", "llvm.ppc.altivec.vminsb",
      genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
                  Ty::IntegerVector<1>>,
      genLibCall},
     {"__ppc_vec_min", "llvm.ppc.altivec.vminsh",
      genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
                  Ty::IntegerVector<2>>,
      genLibCall},
     {"__ppc_vec_min", "llvm.ppc.altivec.vminsw",
      genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
                  Ty::IntegerVector<4>>,
      genLibCall},
     {"__ppc_vec_min", "llvm.ppc.altivec.vminsd",
      genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
                  Ty::IntegerVector<8>>,
      genLibCall},
     {"__ppc_vec_min", "llvm.ppc.altivec.vminub",
      genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
                  Ty::UnsignedVector<1>>,
      genLibCall},
     {"__ppc_vec_min", "llvm.ppc.altivec.vminuh",
      genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
                  Ty::UnsignedVector<2>>,
      genLibCall},
     {"__ppc_vec_min", "llvm.ppc.altivec.vminuw",
      genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
                  Ty::UnsignedVector<4>>,
      genLibCall},
     {"__ppc_vec_min", "llvm.ppc.altivec.vminud",
      genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
                  Ty::UnsignedVector<8>>,
      genLibCall},
     {"__ppc_vec_min", "llvm.ppc.vsx.xvminsp",
      genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
      genLibCall},
     {"__ppc_vec_min", "llvm.ppc.vsx.xvmindp",
      genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
      genLibCall},
     {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v4f32",
      genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
                  Ty::RealVector<4>>,
      genLibCall},
     {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v2f64",
      genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
                  Ty::RealVector<8>>,
      genLibCall},
 };

 const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name) {
   auto compare = [](const IntrinsicHandler &ppcHandler, llvm::StringRef name) {
     return name.compare(ppcHandler.name) > 0;
   };
   auto result = llvm::lower_bound(ppcHandlers, name, compare);
   return result != std::end(ppcHandlers) && result->name == name ? result
                                                                  : nullptr;
 }

 using RtMap = Fortran::common::StaticMultimapView<MathOperation>;
 static constexpr RtMap ppcMathOps(ppcMathOperations);
 static_assert(ppcMathOps.Verify() && "map must be sorted");

 std::pair<const MathOperation *, const MathOperation *>
 checkPPCMathOperationsRange(llvm::StringRef name) {
   return ppcMathOps.equal_range(name);
 }

 // Helper functions for vector element ordering.
 bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() {
   const auto triple{fir::getTargetTriple(builder.getModule())};
   return (triple.isLittleEndian() &&
           converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
 }
 bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() {
   const auto triple{fir::getTargetTriple(builder.getModule())};
   return (triple.isLittleEndian() &&
           !converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
 }
 bool PPCIntrinsicLibrary::changeVecElemOrder() {
   const auto triple{fir::getTargetTriple(builder.getModule())};
   return (triple.isLittleEndian() !=
           converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
 }

 static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context,
                                            int quadCnt, int pairCnt, int vecCnt,
                                            int intCnt = 0,
                                            int vecElemBitSize = 8,
                                            int intBitSize = 32) {
   // Constructs a function type with the following signature:
   // Result type: __vector_pair
   // Arguments:
   //   quadCnt: number of arguments that has __vector_quad type, followed by
   //   pairCnt: number of arguments that has __vector_pair type, followed by
   //   vecCnt: number of arguments that has vector(integer) type, followed by
   //   intCnt: number of arguments that has integer type
   //   vecElemBitSize: specifies the size of vector elements in bits
   //   intBitSize: specifies the size of integer arguments in bits
   auto vType{mlir::VectorType::get(
       128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))};
   auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
   auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
   auto iType{mlir::IntegerType::get(context, intBitSize)};
   llvm::SmallVector<mlir::Type> argTypes;
   for (int i = 0; i < quadCnt; ++i) {
     argTypes.push_back(vqType);
   }
   for (int i = 0; i < pairCnt; ++i) {
     argTypes.push_back(vpType);
   }
   for (int i = 0; i < vecCnt; ++i) {
     argTypes.push_back(vType);
   }
   for (int i = 0; i < intCnt; ++i) {
     argTypes.push_back(iType);
   }

   return mlir::FunctionType::get(context, argTypes, {vpType});
 }

 static mlir::FunctionType genMmaVqFuncType(mlir::MLIRContext *context,
                                            int quadCnt, int pairCnt, int vecCnt,
                                            int intCnt = 0,
                                            int vecElemBitSize = 8,
                                            int intBitSize = 32) {
   // Constructs a function type with the following signature:
   // Result type: __vector_quad
   // Arguments:
   //   quadCnt: number of arguments that has __vector_quad type, followed by
   //   pairCnt: number of arguments that has __vector_pair type, followed by
   //   vecCnt: number of arguments that has vector(integer) type, followed by
   //   intCnt: number of arguments that has integer type
   //   vecElemBitSize: specifies the size of vector elements in bits
   //   intBitSize: specifies the size of integer arguments in bits
   auto vType{mlir::VectorType::get(
       128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))};
   auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
   auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
   auto iType{mlir::IntegerType::get(context, intBitSize)};
   llvm::SmallVector<mlir::Type> argTypes;
   for (int i = 0; i < quadCnt; ++i) {
     argTypes.push_back(vqType);
   }
   for (int i = 0; i < pairCnt; ++i) {
     argTypes.push_back(vpType);
   }
   for (int i = 0; i < vecCnt; ++i) {
     argTypes.push_back(vType);
   }
   for (int i = 0; i < intCnt; ++i) {
     argTypes.push_back(iType);
   }

   return mlir::FunctionType::get(context, argTypes, {vqType});
 }

 mlir::FunctionType genMmaDisassembleFuncType(mlir::MLIRContext *context,
                                              MMAOp mmaOp) {
   auto vType{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))};
   llvm::SmallVector<mlir::Type> members;

   if (mmaOp == MMAOp::DisassembleAcc) {
     auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
     members.push_back(vType);
     members.push_back(vType);
     members.push_back(vType);
     members.push_back(vType);
     auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)};
     return mlir::FunctionType::get(context, {vqType}, {resType});
   } else if (mmaOp == MMAOp::DisassemblePair) {
     auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
     members.push_back(vType);
     members.push_back(vType);
     auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)};
     return mlir::FunctionType::get(context, {vpType}, {resType});
   } else {
     llvm_unreachable(
         "Unsupported intrinsic code for function signature generator");
   }
 }

 //===----------------------------------------------------------------------===//
 // PowerPC specific intrinsic handlers.
 //===----------------------------------------------------------------------===//

 // MTFSF, MTFSFI
 template <bool isImm>
 void PPCIntrinsicLibrary::genMtfsf(llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);
   llvm::SmallVector<mlir::Value> scalarArgs;
   for (const fir::ExtendedValue &arg : args)
     if (arg.getUnboxed())
       scalarArgs.emplace_back(fir::getBase(arg));
     else
       mlir::emitError(loc, "nonscalar intrinsic argument");

   mlir::FunctionType libFuncType;
   mlir::func::FuncOp funcOp;
   if (isImm) {
     libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Integer<4>>(
         builder.getContext(), builder);
     funcOp = builder.createFunction(loc, "llvm.ppc.mtfsfi", libFuncType);
   } else {
     libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Real<8>>(
         builder.getContext(), builder);
     funcOp = builder.createFunction(loc, "llvm.ppc.mtfsf", libFuncType);
   }
   builder.create<fir::CallOp>(loc, funcOp, scalarArgs);
 }

 // VEC_ABS
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType,
                                llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 1);
   auto context{builder.getContext()};
   auto argBases{getBasesForArgs(args)};
   auto vTypeInfo{getVecTypeFromFir(argBases[0])};

   mlir::func::FuncOp funcOp{nullptr};
   mlir::FunctionType ftype;
   llvm::StringRef fname{};
   if (vTypeInfo.isFloat()) {
     if (vTypeInfo.isFloat32()) {
       fname = "llvm.fabs.v4f32";
       ftype =
           genFuncType<Ty::RealVector<4>, Ty::RealVector<4>>(context, builder);
     } else if (vTypeInfo.isFloat64()) {
       fname = "llvm.fabs.v2f64";
       ftype =
           genFuncType<Ty::RealVector<8>, Ty::RealVector<8>>(context, builder);
     }

     funcOp = builder.createFunction(loc, fname, ftype);
     auto callOp{builder.create<fir::CallOp>(loc, funcOp, argBases[0])};
     return callOp.getResult(0);
   } else if (auto eleTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) {
     // vec_abs(arg1) = max(0 - arg1, arg1)

     auto newVecTy{mlir::VectorType::get(vTypeInfo.len, eleTy)};
     auto varg1{builder.createConvert(loc, newVecTy, argBases[0])};
     // construct vector(0,..)
     auto zeroVal{builder.createIntegerConstant(loc, eleTy, 0)};
     auto vZero{
         builder.create<mlir::vector::BroadcastOp>(loc, newVecTy, zeroVal)};
     auto zeroSubVarg1{builder.create<mlir::arith::SubIOp>(loc, vZero, varg1)};

     mlir::func::FuncOp funcOp{nullptr};
     switch (eleTy.getWidth()) {
     case 8:
       fname = "llvm.ppc.altivec.vmaxsb";
       ftype = genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
                           Ty::IntegerVector<1>>(context, builder);
       break;
     case 16:
       fname = "llvm.ppc.altivec.vmaxsh";
       ftype = genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
                           Ty::IntegerVector<2>>(context, builder);
       break;
     case 32:
       fname = "llvm.ppc.altivec.vmaxsw";
       ftype = genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
                           Ty::IntegerVector<4>>(context, builder);
       break;
     case 64:
       fname = "llvm.ppc.altivec.vmaxsd";
       ftype = genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
                           Ty::IntegerVector<8>>(context, builder);
       break;
     default:
       llvm_unreachable("invalid integer size");
     }
     funcOp = builder.createFunction(loc, fname, ftype);

     mlir::Value args[] = {zeroSubVarg1, varg1};
     auto callOp{builder.create<fir::CallOp>(loc, funcOp, args)};
     return builder.createConvert(loc, argBases[0].getType(),
                                  callOp.getResult(0));
   }

   llvm_unreachable("unknown vector type");
 }

 // VEC_ADD, VEC_AND, VEC_SUB, VEC_MUL, VEC_XOR
 template <VecOp vop>
 fir::ExtendedValue PPCIntrinsicLibrary::genVecAddAndMulSubXor(
     mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);
   auto argBases{getBasesForArgs(args)};
   auto argsTy{getTypesForArgs(argBases)};
   assert(mlir::isa<fir::VectorType>(argsTy[0]) &&
          mlir::isa<fir::VectorType>(argsTy[1]));

   auto vecTyInfo{getVecTypeFromFir(argBases[0])};

   const auto isInteger{mlir::isa<mlir::IntegerType>(vecTyInfo.eleTy)};
   const auto isFloat{mlir::isa<mlir::FloatType>(vecTyInfo.eleTy)};
   assert((isInteger || isFloat) && "unknown vector type");

   auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)};

   mlir::Value r{nullptr};
   switch (vop) {
   case VecOp::Add:
     if (isInteger)
       r = builder.create<mlir::arith::AddIOp>(loc, vargs[0], vargs[1]);
     else if (isFloat)
       r = builder.create<mlir::arith::AddFOp>(loc, vargs[0], vargs[1]);
     break;
   case VecOp::Mul:
     if (isInteger)
       r = builder.create<mlir::arith::MulIOp>(loc, vargs[0], vargs[1]);
     else if (isFloat)
       r = builder.create<mlir::arith::MulFOp>(loc, vargs[0], vargs[1]);
     break;
   case VecOp::Sub:
     if (isInteger)
       r = builder.create<mlir::arith::SubIOp>(loc, vargs[0], vargs[1]);
     else if (isFloat)
       r = builder.create<mlir::arith::SubFOp>(loc, vargs[0], vargs[1]);
     break;
   case VecOp::And:
   case VecOp::Xor: {
     mlir::Value arg1{nullptr};
     mlir::Value arg2{nullptr};
     if (isInteger) {
       arg1 = vargs[0];
       arg2 = vargs[1];
     } else if (isFloat) {
       // bitcast the arguments to integer
       auto wd{mlir::dyn_cast<mlir::FloatType>(vecTyInfo.eleTy).getWidth()};
       auto ftype{builder.getIntegerType(wd)};
       auto bcVecTy{mlir::VectorType::get(vecTyInfo.len, ftype)};
       arg1 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0]);
       arg2 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1]);
     }
     if (vop == VecOp::And)
       r = builder.create<mlir::arith::AndIOp>(loc, arg1, arg2);
     else if (vop == VecOp::Xor)
       r = builder.create<mlir::arith::XOrIOp>(loc, arg1, arg2);

     if (isFloat)
       r = builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), r);

     break;
   }
   }

   return builder.createConvert(loc, argsTy[0], r);
 }

 // VEC_ANY_GE
 template <VecOp vop>
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecAnyCompare(mlir::Type resultType,
                                       llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);
   assert(vop == VecOp::Anyge && "unknown vector compare operation");
   auto argBases{getBasesForArgs(args)};
   VecTypeInfo vTypeInfo{getVecTypeFromFir(argBases[0])};
   [[maybe_unused]] const auto isSupportedTy{
       mlir::isa<mlir::Float32Type, mlir::Float64Type, mlir::IntegerType>(
           vTypeInfo.eleTy)};
   assert(isSupportedTy && "unsupported vector type");

   // Constants for mapping CR6 bits to predicate result
   enum { CR6_EQ_REV = 1, CR6_LT_REV = 3 };

   auto context{builder.getContext()};

   static std::map<std::pair<ParamTypeId, unsigned>,
                   std::pair<llvm::StringRef, mlir::FunctionType>>
       uiBuiltin{
           {std::make_pair(ParamTypeId::IntegerVector, 8),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtsb.p",
                genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<1>,
                            Ty::IntegerVector<1>>(context, builder))},
           {std::make_pair(ParamTypeId::IntegerVector, 16),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtsh.p",
                genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<2>,
                            Ty::IntegerVector<2>>(context, builder))},
           {std::make_pair(ParamTypeId::IntegerVector, 32),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtsw.p",
                genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<4>,
                            Ty::IntegerVector<4>>(context, builder))},
           {std::make_pair(ParamTypeId::IntegerVector, 64),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtsd.p",
                genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<8>,
                            Ty::IntegerVector<8>>(context, builder))},
           {std::make_pair(ParamTypeId::UnsignedVector, 8),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtub.p",
                genFuncType<Ty::Integer<4>, Ty::Integer<4>,
                            Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>(
                    context, builder))},
           {std::make_pair(ParamTypeId::UnsignedVector, 16),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtuh.p",
                genFuncType<Ty::Integer<4>, Ty::Integer<4>,
                            Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>(
                    context, builder))},
           {std::make_pair(ParamTypeId::UnsignedVector, 32),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtuw.p",
                genFuncType<Ty::Integer<4>, Ty::Integer<4>,
                            Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>(
                    context, builder))},
           {std::make_pair(ParamTypeId::UnsignedVector, 64),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtud.p",
                genFuncType<Ty::Integer<4>, Ty::Integer<4>,
                            Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>(
                    context, builder))},
       };

   mlir::FunctionType ftype{nullptr};
   llvm::StringRef fname;
   const auto i32Ty{mlir::IntegerType::get(context, 32)};
   llvm::SmallVector<mlir::Value> cmpArgs;
   mlir::Value op{nullptr};
   const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};

   if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) {
     std::pair<llvm::StringRef, mlir::FunctionType> bi;
     bi = (elementTy.isUnsignedInteger())
              ? uiBuiltin[std::pair(ParamTypeId::UnsignedVector, width)]
              : uiBuiltin[std::pair(ParamTypeId::IntegerVector, width)];

     fname = std::get<0>(bi);
     ftype = std::get<1>(bi);

     op = builder.createIntegerConstant(loc, i32Ty, CR6_LT_REV);
     cmpArgs.emplace_back(op);
     // reverse the argument order
     cmpArgs.emplace_back(argBases[1]);
     cmpArgs.emplace_back(argBases[0]);
   } else if (vTypeInfo.isFloat()) {
     if (vTypeInfo.isFloat32()) {
       fname = "llvm.ppc.vsx.xvcmpgesp.p";
       ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<4>,
                           Ty::RealVector<4>>(context, builder);
     } else {
       fname = "llvm.ppc.vsx.xvcmpgedp.p";
       ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<8>,
                           Ty::RealVector<8>>(context, builder);
     }
     op = builder.createIntegerConstant(loc, i32Ty, CR6_EQ_REV);
     cmpArgs.emplace_back(op);
     cmpArgs.emplace_back(argBases[0]);
     cmpArgs.emplace_back(argBases[1]);
   }
   assert((!fname.empty() && ftype) && "invalid type");

   mlir::func::FuncOp funcOp{builder.createFunction(loc, fname, ftype)};
   auto callOp{builder.create<fir::CallOp>(loc, funcOp, cmpArgs)};
   return callOp.getResult(0);
 }

 static std::pair<llvm::StringRef, mlir::FunctionType>
 getVecCmpFuncTypeAndName(VecTypeInfo &vTypeInfo, VecOp vop,
                          fir::FirOpBuilder &builder) {
   auto context{builder.getContext()};
   static std::map<std::pair<ParamTypeId, unsigned>,
                   std::pair<llvm::StringRef, mlir::FunctionType>>
       iuBuiltinName{
           {std::make_pair(ParamTypeId::IntegerVector, 8),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtsb",
                genFuncType<Ty::UnsignedVector<1>, Ty::IntegerVector<1>,
                            Ty::IntegerVector<1>>(context, builder))},
           {std::make_pair(ParamTypeId::IntegerVector, 16),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtsh",
                genFuncType<Ty::UnsignedVector<2>, Ty::IntegerVector<2>,
                            Ty::IntegerVector<2>>(context, builder))},
           {std::make_pair(ParamTypeId::IntegerVector, 32),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtsw",
                genFuncType<Ty::UnsignedVector<4>, Ty::IntegerVector<4>,
                            Ty::IntegerVector<4>>(context, builder))},
           {std::make_pair(ParamTypeId::IntegerVector, 64),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtsd",
                genFuncType<Ty::UnsignedVector<8>, Ty::IntegerVector<8>,
                            Ty::IntegerVector<8>>(context, builder))},
           {std::make_pair(ParamTypeId::UnsignedVector, 8),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtub",
                genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
                            Ty::UnsignedVector<1>>(context, builder))},
           {std::make_pair(ParamTypeId::UnsignedVector, 16),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtuh",
                genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
                            Ty::UnsignedVector<2>>(context, builder))},
           {std::make_pair(ParamTypeId::UnsignedVector, 32),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtuw",
                genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
                            Ty::UnsignedVector<4>>(context, builder))},
           {std::make_pair(ParamTypeId::UnsignedVector, 64),
            std::make_pair(
                "llvm.ppc.altivec.vcmpgtud",
                genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
                            Ty::UnsignedVector<8>>(context, builder))}};

   // VSX only defines GE and GT builtins. Cmple and Cmplt use GE and GT with
   // arguments revsered.
   enum class Cmp { gtOrLt, geOrLe };
   static std::map<std::pair<Cmp, int>,
                   std::pair<llvm::StringRef, mlir::FunctionType>>
       rGBI{{std::make_pair(Cmp::geOrLe, 32),
             std::make_pair("llvm.ppc.vsx.xvcmpgesp",
                            genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>,
                                        Ty::RealVector<4>>(context, builder))},
            {std::make_pair(Cmp::geOrLe, 64),
             std::make_pair("llvm.ppc.vsx.xvcmpgedp",
                            genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>,
                                        Ty::RealVector<8>>(context, builder))},
            {std::make_pair(Cmp::gtOrLt, 32),
             std::make_pair("llvm.ppc.vsx.xvcmpgtsp",
                            genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>,
                                        Ty::RealVector<4>>(context, builder))},
            {std::make_pair(Cmp::gtOrLt, 64),
             std::make_pair("llvm.ppc.vsx.xvcmpgtdp",
                            genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>,
                                        Ty::RealVector<8>>(context, builder))}};

   const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
   std::pair<llvm::StringRef, mlir::FunctionType> specFunc;
   if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy))
     specFunc =
         (elementTy.isUnsignedInteger())
             ? iuBuiltinName[std::make_pair(ParamTypeId::UnsignedVector, width)]
             : iuBuiltinName[std::make_pair(ParamTypeId::IntegerVector, width)];
   else if (vTypeInfo.isFloat())
     specFunc = (vop == VecOp::Cmpge || vop == VecOp::Cmple)
                    ? rGBI[std::make_pair(Cmp::geOrLe, width)]
                    : rGBI[std::make_pair(Cmp::gtOrLt, width)];

   assert(!std::get<0>(specFunc).empty() && "unknown builtin name");
   assert(std::get<1>(specFunc) && "unknown function type");
   return specFunc;
 }

 // VEC_CMPGE, VEC_CMPGT, VEC_CMPLE, VEC_CMPLT
 template <VecOp vop>
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecCmp(mlir::Type resultType,
                                llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);
   auto context{builder.getContext()};
   auto argBases{getBasesForArgs(args)};
   VecTypeInfo vecTyInfo{getVecTypeFromFir(argBases[0])};
   auto varg{convertVecArgs(builder, loc, vecTyInfo, argBases)};

   std::pair<llvm::StringRef, mlir::FunctionType> funcTyNam{
       getVecCmpFuncTypeAndName(vecTyInfo, vop, builder)};

   mlir::func::FuncOp funcOp = builder.createFunction(
       loc, std::get<0>(funcTyNam), std::get<1>(funcTyNam));

   mlir::Value res{nullptr};

   if (auto eTy = mlir::dyn_cast<mlir::IntegerType>(vecTyInfo.eleTy)) {
     constexpr int firstArg{0};
     constexpr int secondArg{1};
     std::map<VecOp, std::array<int, 2>> argOrder{
         {VecOp::Cmpge, {secondArg, firstArg}},
         {VecOp::Cmple, {firstArg, secondArg}},
         {VecOp::Cmpgt, {firstArg, secondArg}},
         {VecOp::Cmplt, {secondArg, firstArg}}};

     // Construct the function return type, unsigned vector, for conversion.
     auto itype = mlir::IntegerType::get(context, eTy.getWidth(),
                                         mlir::IntegerType::Unsigned);
     auto returnType = fir::VectorType::get(vecTyInfo.len, itype);

     switch (vop) {
     case VecOp::Cmpgt:
     case VecOp::Cmplt: {
       // arg1 > arg2 --> vcmpgt(arg1, arg2)
       // arg1 < arg2 --> vcmpgt(arg2, arg1)
       mlir::Value vargs[]{argBases[argOrder[vop][0]],
                           argBases[argOrder[vop][1]]};
       auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
       res = callOp.getResult(0);
       break;
     }
     case VecOp::Cmpge:
     case VecOp::Cmple: {
       // arg1 >= arg2 --> vcmpge(arg2, arg1) xor vector(-1)
       // arg1 <= arg2 --> vcmpge(arg1, arg2) xor vector(-1)
       mlir::Value vargs[]{argBases[argOrder[vop][0]],
                           argBases[argOrder[vop][1]]};

       // Construct a constant vector(-1)
       auto negOneVal{builder.createIntegerConstant(
           loc, getConvertedElementType(context, eTy), -1)};
       auto vNegOne{builder.create<mlir::vector::BroadcastOp>(
           loc, vecTyInfo.toMlirVectorType(context), negOneVal)};

       auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
       mlir::Value callRes{callOp.getResult(0)};
       auto vargs2{
           convertVecArgs(builder, loc, vecTyInfo, mlir::ValueRange{callRes})};
       auto xorRes{builder.create<mlir::arith::XOrIOp>(loc, vargs2[0], vNegOne)};

       res = builder.createConvert(loc, returnType, xorRes);
       break;
     }
     default:
       llvm_unreachable("Invalid vector operation for generator");
     }
   } else if (vecTyInfo.isFloat()) {
     mlir::Value vargs[2];
     switch (vop) {
     case VecOp::Cmpge:
     case VecOp::Cmpgt:
       vargs[0] = argBases[0];
       vargs[1] = argBases[1];
       break;
     case VecOp::Cmple:
     case VecOp::Cmplt:
       // Swap the arguments as xvcmpg[et] is used
       vargs[0] = argBases[1];
       vargs[1] = argBases[0];
       break;
     default:
       llvm_unreachable("Invalid vector operation for generator");
     }
     auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
     res = callOp.getResult(0);
   } else
     llvm_unreachable("invalid vector type");

   return res;
 }

 static inline mlir::Value swapVectorWordPairs(fir::FirOpBuilder &builder,
                                               mlir::Location loc,
                                               mlir::Value arg) {
   auto ty = arg.getType();
   auto context{builder.getContext()};
   auto vtype{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))};

   if (ty != vtype)
     arg = builder.create<mlir::LLVM::BitcastOp>(loc, vtype, arg).getResult();

   llvm::SmallVector<int64_t, 16> mask{4,  5,  6,  7,  0, 1, 2,  3,
                                       12, 13, 14, 15, 8, 9, 10, 11};
   arg = builder.create<mlir::vector::ShuffleOp>(loc, arg, arg, mask);
   if (ty != vtype)
     arg = builder.create<mlir::LLVM::BitcastOp>(loc, ty, arg);
   return arg;
 }

 // VEC_CONVERT, VEC_CTF, VEC_CVF
 template <VecOp vop>
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType,
                                    llvm::ArrayRef<fir::ExtendedValue> args) {
   auto context{builder.getContext()};
   auto argBases{getBasesForArgs(args)};
   auto vecTyInfo{getVecTypeFromFir(argBases[0])};
   auto mlirTy{vecTyInfo.toMlirVectorType(context)};
   auto vArg1{builder.createConvert(loc, mlirTy, argBases[0])};
   const auto i32Ty{mlir::IntegerType::get(context, 32)};

   switch (vop) {
   case VecOp::Ctf: {
     assert(args.size() == 2);
     auto convArg{builder.createConvert(loc, i32Ty, argBases[1])};
     auto eTy{mlir::dyn_cast<mlir::IntegerType>(vecTyInfo.eleTy)};
     assert(eTy && "Unsupported vector type");
     const auto isUnsigned{eTy.isUnsignedInteger()};
     const auto width{eTy.getWidth()};

     if (width == 32) {
       auto ftype{(isUnsigned)
                      ? genFuncType<Ty::RealVector<4>, Ty::UnsignedVector<4>,
                                    Ty::Integer<4>>(context, builder)
                      : genFuncType<Ty::RealVector<4>, Ty::IntegerVector<4>,
                                    Ty::Integer<4>>(context, builder)};
       const llvm::StringRef fname{(isUnsigned) ? "llvm.ppc.altivec.vcfux"
                                                : "llvm.ppc.altivec.vcfsx"};
       auto funcOp{builder.createFunction(loc, fname, ftype)};
       mlir::Value newArgs[] = {argBases[0], convArg};
       auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};

       return callOp.getResult(0);
     } else if (width == 64) {
       auto fTy{mlir::FloatType::getF64(context)};
       auto ty{mlir::VectorType::get(2, fTy)};

       // vec_vtf(arg1, arg2) = fmul(1.0 / (1 << arg2), llvm.sitofp(arg1))
       auto convOp{(isUnsigned)
                       ? builder.create<mlir::LLVM::UIToFPOp>(loc, ty, vArg1)
                       : builder.create<mlir::LLVM::SIToFPOp>(loc, ty, vArg1)};

       // construct vector<1./(1<<arg1), 1.0/(1<<arg1)>
       auto constInt{mlir::dyn_cast_or_null<mlir::IntegerAttr>(
           mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[1].getDefiningOp())
               .getValue())};
       assert(constInt && "expected integer constant argument");
       double f{1.0 / (1 << constInt.getInt())};
       llvm::SmallVector<double> vals{f, f};
       auto constOp{builder.create<mlir::arith::ConstantOp>(
           loc, ty, builder.getF64VectorAttr(vals))};

       auto mulOp{builder.create<mlir::LLVM::FMulOp>(
           loc, ty, convOp->getResult(0), constOp)};

       return builder.createConvert(loc, fir::VectorType::get(2, fTy), mulOp);
     }
     llvm_unreachable("invalid element integer kind");
   }
   case VecOp::Convert: {
     assert(args.size() == 2);
     // resultType has mold type (if scalar) or element type (if array)
     auto resTyInfo{getVecTypeFromFirType(resultType)};
     auto moldTy{resTyInfo.toMlirVectorType(context)};
     auto firTy{resTyInfo.toFirVectorType()};

     // vec_convert(v, mold) = bitcast v to "type of mold"
     auto conv{builder.create<mlir::LLVM::BitcastOp>(loc, moldTy, vArg1)};

     return builder.createConvert(loc, firTy, conv);
   }
   case VecOp::Cvf: {
     assert(args.size() == 1);

     mlir::Value newArgs[]{vArg1};
     if (vecTyInfo.isFloat32()) {
       if (changeVecElemOrder())
         newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);

       const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"};
       auto ftype{
           genFuncType<Ty::RealVector<8>, Ty::RealVector<4>>(context, builder)};
       auto funcOp{builder.createFunction(loc, fname, ftype)};
       auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};

       return callOp.getResult(0);
     } else if (vecTyInfo.isFloat64()) {
       const llvm::StringRef fname{"llvm.ppc.vsx.xvcvdpsp"};
       auto ftype{
           genFuncType<Ty::RealVector<4>, Ty::RealVector<8>>(context, builder)};
       auto funcOp{builder.createFunction(loc, fname, ftype)};
       newArgs[0] =
           builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0);
       auto fvf32Ty{newArgs[0].getType()};
       auto f32type{mlir::FloatType::getF32(context)};
       auto mvf32Ty{mlir::VectorType::get(4, f32type)};
       newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]);

       if (changeVecElemOrder())
         newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);

       return builder.createConvert(loc, fvf32Ty, newArgs[0]);
     }
     llvm_unreachable("invalid element integer kind");
   }
   default:
     llvm_unreachable("Invalid vector operation for generator");
   }
 }

 static mlir::Value convertVectorElementOrder(fir::FirOpBuilder &builder,
                                              mlir::Location loc,
                                              VecTypeInfo vecInfo,
                                              mlir::Value idx) {
   mlir::Value numSub1{
       builder.createIntegerConstant(loc, idx.getType(), vecInfo.len - 1)};
   return builder.create<mlir::LLVM::SubOp>(loc, idx.getType(), numSub1, idx);
 }

 // VEC_EXTRACT
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecExtract(mlir::Type resultType,
                                    llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);
   auto argBases{getBasesForArgs(args)};
   auto argTypes{getTypesForArgs(argBases)};
   auto vecTyInfo{getVecTypeFromFir(argBases[0])};

   auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
   auto varg0{builder.createConvert(loc, mlirTy, argBases[0])};

   // arg2 modulo the number of elements in arg1 to determine the element
   // position
   auto numEle{builder.createIntegerConstant(loc, argTypes[1], vecTyInfo.len)};
   mlir::Value uremOp{
       builder.create<mlir::LLVM::URemOp>(loc, argBases[1], numEle)};

   if (!isNativeVecElemOrderOnLE())
     uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp);

   return builder.create<mlir::vector::ExtractElementOp>(loc, varg0, uremOp);
 }

 // VEC_INSERT
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecInsert(mlir::Type resultType,
                                   llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 3);
   auto argBases{getBasesForArgs(args)};
   auto argTypes{getTypesForArgs(argBases)};
   auto vecTyInfo{getVecTypeFromFir(argBases[1])};
   auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
   auto varg1{builder.createConvert(loc, mlirTy, argBases[1])};

   auto numEle{builder.createIntegerConstant(loc, argTypes[2], vecTyInfo.len)};
   mlir::Value uremOp{
       builder.create<mlir::LLVM::URemOp>(loc, argBases[2], numEle)};

   if (!isNativeVecElemOrderOnLE())
     uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp);

   auto res{builder.create<mlir::vector::InsertElementOp>(loc, argBases[0],
                                                          varg1, uremOp)};
   return builder.create<fir::ConvertOp>(loc, vecTyInfo.toFirVectorType(), res);
 }

 // VEC_MERGEH, VEC_MERGEL
 template <VecOp vop>
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecMerge(mlir::Type resultType,
                                  llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);
   auto argBases{getBasesForArgs(args)};
   auto vecTyInfo{getVecTypeFromFir(argBases[0])};
   llvm::SmallVector<int64_t, 16> mMask; // native vector element order mask
   llvm::SmallVector<int64_t, 16> rMask; // non-native vector element order mask

   switch (vop) {
   case VecOp::Mergeh: {
     switch (vecTyInfo.len) {
     case 2: {
       enum { V1 = 0, V2 = 2 };
       mMask = {V1 + 0, V2 + 0};
       rMask = {V2 + 1, V1 + 1};
       break;
     }
     case 4: {
       enum { V1 = 0, V2 = 4 };
       mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1};
       rMask = {V2 + 2, V1 + 2, V2 + 3, V1 + 3};
       break;
     }
     case 8: {
       enum { V1 = 0, V2 = 8 };
       mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1, V1 + 2, V2 + 2, V1 + 3, V2 + 3};
       rMask = {V2 + 4, V1 + 4, V2 + 5, V1 + 5, V2 + 6, V1 + 6, V2 + 7, V1 + 7};
       break;
     }
     case 16:
       mMask = {0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13,
                0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17};
       rMask = {0x18, 0x08, 0x19, 0x09, 0x1A, 0x0A, 0x1B, 0x0B,
                0x1C, 0x0C, 0x1D, 0x0D, 0x1E, 0x0E, 0x1F, 0x0F};
       break;
     default:
       llvm_unreachable("unexpected vector length");
     }
     break;
   }
   case VecOp::Mergel: {
     switch (vecTyInfo.len) {
     case 2: {
       enum { V1 = 0, V2 = 2 };
       mMask = {V1 + 1, V2 + 1};
       rMask = {V2 + 0, V1 + 0};
       break;
     }
     case 4: {
       enum { V1 = 0, V2 = 4 };
       mMask = {V1 + 2, V2 + 2, V1 + 3, V2 + 3};
       rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1};
       break;
     }
     case 8: {
       enum { V1 = 0, V2 = 8 };
       mMask = {V1 + 4, V2 + 4, V1 + 5, V2 + 5, V1 + 6, V2 + 6, V1 + 7, V2 + 7};
       rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1, V2 + 2, V1 + 2, V2 + 3, V1 + 3};
       break;
     }
     case 16:
       mMask = {0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B,
                0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F};
       rMask = {0x10, 0x00, 0x11, 0x01, 0x12, 0x02, 0x13, 0x03,
                0x14, 0x04, 0x15, 0x05, 0x16, 0x06, 0x17, 0x07};
       break;
     default:
       llvm_unreachable("unexpected vector length");
     }
     break;
   }
   default:
     llvm_unreachable("invalid vector operation for generator");
   }

   auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)};

   llvm::SmallVector<int64_t, 16> &mergeMask =
       (isBEVecElemOrderOnLE()) ? rMask : mMask;

   auto callOp{builder.create<mlir::vector::ShuffleOp>(loc, vargs[0], vargs[1],
                                                       mergeMask)};
   return builder.createConvert(loc, resultType, callOp);
 }

 static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder,
                                       mlir::Location loc, mlir::Value baseAddr,
                                       mlir::Value offset) {
   auto typeExtent{fir::SequenceType::getUnknownExtent()};
   // Construct an !fir.ref<!ref.array<?xi8>> type
   auto arrRefTy{builder.getRefType(fir::SequenceType::get(
       {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))};
   // Convert arg to !fir.ref<!ref.array<?xi8>>
   auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)};

   return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset);
 }

 static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
                                          mlir::Location loc, mlir::Value v,
                                          int64_t len) {
   assert(mlir::isa<mlir::VectorType>(v.getType()));
   assert(len > 0);
   llvm::SmallVector<int64_t, 16> mask;
   for (int64_t i = 0; i < len; ++i) {
     mask.push_back(len - 1 - i);
   }
   auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())};
   return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
 }

 static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder,
                                              const int val) {
   auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)};
   auto alignAttr{mlir::IntegerAttr::get(i64ty, val)};
   return builder.getNamedAttr("alignment", alignAttr);
 }

 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecXlGrp(mlir::Type resultType,
                                  llvm::ArrayRef<fir::ExtendedValue> args) {
   VecTypeInfo vecTyInfo{getVecTypeFromFirType(resultType)};
   switch (vecTyInfo.eleTy.getIntOrFloatBitWidth()) {
   case 8:
     // vec_xlb1
     return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
   case 16:
     // vec_xlh8
     return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
   case 32:
     // vec_xlw4
     return genVecLdCallGrp<VecOp::Xlw4>(resultType, args);
   case 64:
     // vec_xld2
     return genVecLdCallGrp<VecOp::Xld2>(resultType, args);
   default:
     llvm_unreachable("invalid kind");
   }
   llvm_unreachable("invalid vector operation for generator");
 }

 template <VecOp vop>
 fir::ExtendedValue PPCIntrinsicLibrary::genVecLdNoCallGrp(
     mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);
   auto arg0{getBase(args[0])};
   auto arg1{getBase(args[1])};

   auto vecTyInfo{getVecTypeFromFirType(resultType)};
   auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
   auto firTy{vecTyInfo.toFirVectorType()};

   // Add the %val of arg0 to %addr of arg1
   auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};

   const auto triple{fir::getTargetTriple(builder.getModule())};
   // Need to get align 1.
   auto result{builder.create<fir::LoadOp>(loc, mlirTy, addr,
                                           getAlignmentAttr(builder, 1))};
   if ((vop == VecOp::Xl && isBEVecElemOrderOnLE()) ||
       (vop == VecOp::Xlbe && triple.isLittleEndian()))
     return builder.createConvert(
         loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));

   return builder.createConvert(loc, firTy, result);
 }

 // VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4
 template <VecOp vop>
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType,
                                      llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);
   auto context{builder.getContext()};
   auto arg0{getBase(args[0])};
   auto arg1{getBase(args[1])};

   // Prepare the return type in FIR.
   auto vecResTyInfo{getVecTypeFromFirType(resultType)};
   auto mlirTy{vecResTyInfo.toMlirVectorType(context)};
   auto firTy{vecResTyInfo.toFirVectorType()};

   // llvm.ppc.altivec.lvx* returns <4xi32>
   // Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type
   const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)};
   const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)};

   // For vec_ld, need to convert arg0 from i64 to i32
   if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64)
     arg0 = builder.createConvert(loc, i32Ty, arg0);

   // Add the %val of arg0 to %addr of arg1
   auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
   llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};

   mlir::Type intrinResTy{nullptr};
   llvm::StringRef fname{};
   switch (vop) {
   case VecOp::Ld:
     fname = "llvm.ppc.altivec.lvx";
     intrinResTy = mVecI32Ty;
     break;
   case VecOp::Lde:
     switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) {
     case 8:
       fname = "llvm.ppc.altivec.lvebx";
       intrinResTy = mlirTy;
       break;
     case 16:
       fname = "llvm.ppc.altivec.lvehx";
       intrinResTy = mlirTy;
       break;
     case 32:
       fname = "llvm.ppc.altivec.lvewx";
       if (mlir::isa<mlir::IntegerType>(vecResTyInfo.eleTy))
         intrinResTy = mlirTy;
       else
         intrinResTy = mVecI32Ty;
       break;
     default:
       llvm_unreachable("invalid vector for vec_lde");
     }
     break;
   case VecOp::Ldl:
     fname = "llvm.ppc.altivec.lvxl";
     intrinResTy = mVecI32Ty;
     break;
   case VecOp::Lxvp:
     fname = "llvm.ppc.vsx.lxvp";
     intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1));
     break;
   case VecOp::Xld2: {
     fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be"
                                    : "llvm.ppc.vsx.lxvd2x";
     // llvm.ppc.altivec.lxvd2x* returns <2 x double>
     intrinResTy = mlir::VectorType::get(2, mlir::FloatType::getF64(context));
   } break;
   case VecOp::Xlw4:
     fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be"
                                    : "llvm.ppc.vsx.lxvw4x";
     // llvm.ppc.altivec.lxvw4x* returns <4xi32>
     intrinResTy = mVecI32Ty;
     break;
   default:
     llvm_unreachable("invalid vector operation for generator");
   }

   auto funcType{
       mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})};
   auto funcOp{builder.createFunction(loc, fname, funcType)};
   auto result{
       builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};

   if (vop == VecOp::Lxvp)
     return result;

   if (intrinResTy != mlirTy)
     result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, result);

   if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE())
     return builder.createConvert(
         loc, firTy,
         reverseVectorElements(builder, loc, result, vecResTyInfo.len));

   return builder.createConvert(loc, firTy, result);
 }

 // VEC_LVSL, VEC_LVSR
 template <VecOp vop>
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType,
                                   llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);
   auto context{builder.getContext()};
   auto arg0{getBase(args[0])};
   auto arg1{getBase(args[1])};

   auto vecTyInfo{getVecTypeFromFirType(resultType)};
   auto mlirTy{vecTyInfo.toMlirVectorType(context)};
   auto firTy{vecTyInfo.toFirVectorType()};

   // Convert arg0 to i64 type if needed
   auto i64ty{mlir::IntegerType::get(context, 64)};
   if (arg0.getType() != i64ty)
     arg0 = builder.create<fir::ConvertOp>(loc, i64ty, arg0);

   // offset is modulo 16, so shift left 56 bits and then right 56 bits to clear
   //   upper 56 bit while preserving sign
   auto shiftVal{builder.createIntegerConstant(loc, i64ty, 56)};
   auto offset{builder.create<mlir::arith::ShLIOp>(loc, arg0, shiftVal)};
   auto offset2{builder.create<mlir::arith::ShRSIOp>(loc, offset, shiftVal)};

   // Add the offsetArg to %addr of arg1
   auto addr{addOffsetToAddress(builder, loc, arg1, offset2)};
   llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};

   llvm::StringRef fname{};
   switch (vop) {
   case VecOp::Lvsl:
     fname = "llvm.ppc.altivec.lvsl";
     break;
   case VecOp::Lvsr:
     fname = "llvm.ppc.altivec.lvsr";
     break;
   default:
     llvm_unreachable("invalid vector operation for generator");
   }
   auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})};
   auto funcOp{builder.createFunction(loc, fname, funcType)};
   auto result{
       builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};

   if (isNativeVecElemOrderOnLE())
     return builder.createConvert(
         loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));

   return builder.createConvert(loc, firTy, result);
 }

 // VEC_NMADD, VEC_MSUB
 template <VecOp vop>
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType,
                                      llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 3);
   auto context{builder.getContext()};
   auto argBases{getBasesForArgs(args)};
   auto vTypeInfo{getVecTypeFromFir(argBases[0])};
   auto newArgs{convertVecArgs(builder, loc, vTypeInfo, argBases)};
   const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};

   static std::map<int, std::pair<llvm::StringRef, mlir::FunctionType>> fmaMap{
       {32,
        std::make_pair(
            "llvm.fma.v4f32",
            genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>(
                context, builder))},
       {64,
        std::make_pair(
            "llvm.fma.v2f64",
            genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>(
                context, builder))}};

   auto funcOp{builder.createFunction(loc, std::get<0>(fmaMap[width]),
                                      std::get<1>(fmaMap[width]))};
   if (vop == VecOp::Nmadd) {
     // vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3)
     auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};

     // We need to convert fir.vector to MLIR vector to use fneg and then back
     // to fir.vector to store.
     auto vCall{builder.createConvert(loc, vTypeInfo.toMlirVectorType(context),
                                      callOp.getResult(0))};
     auto neg{builder.create<mlir::arith::NegFOp>(loc, vCall)};
     return builder.createConvert(loc, vTypeInfo.toFirVectorType(), neg);
   } else if (vop == VecOp::Msub) {
     // vec_msub(arg1, arg2, arg3) = fma(arg1, arg2, -arg3)
     newArgs[2] = builder.create<mlir::arith::NegFOp>(loc, newArgs[2]);

     auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
     return callOp.getResult(0);
   }
   llvm_unreachable("Invalid vector operation for generator");
 }

 // VEC_PERM, VEC_PERMI
 template <VecOp vop>
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType,
                                 llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 3);
   auto context{builder.getContext()};
   auto argBases{getBasesForArgs(args)};
   auto argTypes{getTypesForArgs(argBases)};
   auto vecTyInfo{getVecTypeFromFir(argBases[0])};
   auto mlirTy{vecTyInfo.toMlirVectorType(context)};

   auto vi32Ty{mlir::VectorType::get(4, mlir::IntegerType::get(context, 32))};
   auto vf64Ty{mlir::VectorType::get(2, mlir::FloatType::getF64(context))};

   auto mArg0{builder.createConvert(loc, mlirTy, argBases[0])};
   auto mArg1{builder.createConvert(loc, mlirTy, argBases[1])};

   switch (vop) {
   case VecOp::Perm: {
     VecTypeInfo maskVecTyInfo{getVecTypeFromFir(argBases[2])};
     auto mlirMaskTy{maskVecTyInfo.toMlirVectorType(context)};
     auto mMask{builder.createConvert(loc, mlirMaskTy, argBases[2])};

     if (mlirTy != vi32Ty) {
       mArg0 =
           builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg0).getResult();
       mArg1 =
           builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg1).getResult();
     }

     auto funcOp{builder.createFunction(
         loc, "llvm.ppc.altivec.vperm",
         genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
                     Ty::IntegerVector<4>, Ty::IntegerVector<1>>(context,
                                                                 builder))};

     llvm::SmallVector<mlir::Value> newArgs;
     if (isNativeVecElemOrderOnLE()) {
       auto i8Ty{mlir::IntegerType::get(context, 8)};
       auto v8Ty{mlir::VectorType::get(16, i8Ty)};
       auto negOne{builder.createMinusOneInteger(loc, i8Ty)};
       auto vNegOne{
           builder.create<mlir::vector::BroadcastOp>(loc, v8Ty, negOne)};

       mMask = builder.create<mlir::arith::XOrIOp>(loc, mMask, vNegOne);
       newArgs = {mArg1, mArg0, mMask};
     } else {
       newArgs = {mArg0, mArg1, mMask};
     }

     auto res{builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0)};

     if (res.getType() != argTypes[0]) {
       // fir.call llvm.ppc.altivec.vperm returns !fir.vector<i4:32>
       // convert the result back to the original type
       res = builder.createConvert(loc, vi32Ty, res);
       if (mlirTy != vi32Ty)
         res =
             builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res).getResult();
     }
     return builder.createConvert(loc, resultType, res);
   }
   case VecOp::Permi: {
     // arg3 is a constant
     auto constIntOp{mlir::dyn_cast_or_null<mlir::IntegerAttr>(
         mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp())
             .getValue())};
     assert(constIntOp && "expected integer constant argument");
     auto constInt{constIntOp.getInt()};
     // arg1, arg2, and result type share same VecTypeInfo
     if (vecTyInfo.isFloat()) {
       mArg0 =
           builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg0).getResult();
       mArg1 =
           builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg1).getResult();
     }

     llvm::SmallVector<int64_t, 2> nMask; // native vector element order mask
     llvm::SmallVector<int64_t, 2> rMask; // non-native vector element order mask
     enum { V1 = 0, V2 = 2 };
     switch (constInt) {
     case 0:
       nMask = {V1 + 0, V2 + 0};
       rMask = {V2 + 1, V1 + 1};
       break;
     case 1:
       nMask = {V1 + 0, V2 + 1};
       rMask = {V2 + 0, V1 + 1};
       break;
     case 2:
       nMask = {V1 + 1, V2 + 0};
       rMask = {V2 + 1, V1 + 0};
       break;
     case 3:
       nMask = {V1 + 1, V2 + 1};
       rMask = {V2 + 0, V1 + 0};
       break;
     default:
       llvm_unreachable("unexpected arg3 value for vec_permi");
     }

     llvm::SmallVector<int64_t, 2> mask =
         (isBEVecElemOrderOnLE()) ? rMask : nMask;
     auto res{builder.create<mlir::vector::ShuffleOp>(loc, mArg0, mArg1, mask)};
     if (res.getType() != mlirTy) {
       auto cast{builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res)};
       return builder.createConvert(loc, resultType, cast);
     }
     return builder.createConvert(loc, resultType, res);
   }
   default:
     llvm_unreachable("invalid vector operation for generator");
   }
 }

 // VEC_SEL
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecSel(mlir::Type resultType,
                                llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 3);
   auto argBases{getBasesForArgs(args)};
   llvm::SmallVector<VecTypeInfo, 4> vecTyInfos;
   for (size_t i = 0; i < argBases.size(); i++) {
     vecTyInfos.push_back(getVecTypeFromFir(argBases[i]));
   }
   auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)};

   auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)};
   auto negOne{builder.createMinusOneInteger(loc, i8Ty)};

   // construct a constant <16 x i8> vector with value -1 for bitcast
   auto bcVecTy{mlir::VectorType::get(16, i8Ty)};
   auto vNegOne{builder.create<mlir::vector::BroadcastOp>(loc, bcVecTy, negOne)};

   // bitcast arguments to bcVecTy
   auto arg1{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0])};
   auto arg2{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1])};
   auto arg3{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[2])};

   // vec_sel(arg1, arg2, arg3) =
   //   (arg2 and arg3) or (arg1 and (arg3 xor vector(-1,...)))
   auto comp{builder.create<mlir::arith::XOrIOp>(loc, arg3, vNegOne)};
   auto a1AndComp{builder.create<mlir::arith::AndIOp>(loc, arg1, comp)};
   auto a1OrA2{builder.create<mlir::arith::AndIOp>(loc, arg2, arg3)};
   auto res{builder.create<mlir::arith::OrIOp>(loc, a1AndComp, a1OrA2)};

   auto bcRes{
       builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), res)};

   return builder.createConvert(loc, vecTyInfos[0].toFirVectorType(), bcRes);
 }

 // VEC_SL, VEC_SLD, VEC_SLDW, VEC_SLL, VEC_SLO, VEC_SR, VEC_SRL, VEC_SRO
 template <VecOp vop>
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecShift(mlir::Type resultType,
                                  llvm::ArrayRef<fir::ExtendedValue> args) {
   auto context{builder.getContext()};
   auto argBases{getBasesForArgs(args)};
   auto argTypes{getTypesForArgs(argBases)};

   llvm::SmallVector<VecTypeInfo, 2> vecTyInfoArgs;
   vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[0]));
   vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[1]));

   // Convert the first two arguments to MLIR vectors
   llvm::SmallVector<mlir::Type, 2> mlirTyArgs;
   mlirTyArgs.push_back(vecTyInfoArgs[0].toMlirVectorType(context));
   mlirTyArgs.push_back(vecTyInfoArgs[1].toMlirVectorType(context));

   llvm::SmallVector<mlir::Value, 2> mlirVecArgs;
   mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[0], argBases[0]));
   mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[1], argBases[1]));

   mlir::Value shftRes{nullptr};

   if (vop == VecOp::Sl || vop == VecOp::Sr) {
     assert(args.size() == 2);
     // Construct the mask
     auto width{
         mlir::dyn_cast<mlir::IntegerType>(vecTyInfoArgs[1].eleTy).getWidth()};
     auto vecVal{builder.createIntegerConstant(
         loc, getConvertedElementType(context, vecTyInfoArgs[0].eleTy), width)};
     auto mask{
         builder.create<mlir::vector::BroadcastOp>(loc, mlirTyArgs[1], vecVal)};
     auto shft{builder.create<mlir::arith::RemUIOp>(loc, mlirVecArgs[1], mask)};

     mlir::Value res{nullptr};
     if (vop == VecOp::Sr)
       res = builder.create<mlir::arith::ShRUIOp>(loc, mlirVecArgs[0], shft);
     else if (vop == VecOp::Sl)
       res = builder.create<mlir::arith::ShLIOp>(loc, mlirVecArgs[0], shft);

     shftRes = builder.createConvert(loc, argTypes[0], res);
   } else if (vop == VecOp::Sll || vop == VecOp::Slo || vop == VecOp::Srl ||
              vop == VecOp::Sro) {
     assert(args.size() == 2);

     // Bitcast to vector<4xi32>
     auto bcVecTy{mlir::VectorType::get(4, builder.getIntegerType(32))};
     if (mlirTyArgs[0] != bcVecTy)
       mlirVecArgs[0] =
           builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[0]);
     if (mlirTyArgs[1] != bcVecTy)
       mlirVecArgs[1] =
           builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[1]);

     llvm::StringRef funcName;
     switch (vop) {
     case VecOp::Srl:
       funcName = "llvm.ppc.altivec.vsr";
       break;
     case VecOp::Sro:
       funcName = "llvm.ppc.altivec.vsro";
       break;
     case VecOp::Sll:
       funcName = "llvm.ppc.altivec.vsl";
       break;
     case VecOp::Slo:
       funcName = "llvm.ppc.altivec.vslo";
       break;
     default:
       llvm_unreachable("unknown vector shift operation");
     }
     auto funcTy{genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
                             Ty::IntegerVector<4>>(context, builder)};
     mlir::func::FuncOp funcOp{builder.createFunction(loc, funcName, funcTy)};
     auto callOp{builder.create<fir::CallOp>(loc, funcOp, mlirVecArgs)};

     // If the result vector type is different from the original type, need
     // to convert to mlir vector, bitcast and then convert back to fir vector.
     if (callOp.getResult(0).getType() != argTypes[0]) {
       auto res = builder.createConvert(loc, bcVecTy, callOp.getResult(0));
       res = builder.create<mlir::vector::BitCastOp>(loc, mlirTyArgs[0], res);
       shftRes = builder.createConvert(loc, argTypes[0], res);
     } else {
       shftRes = callOp.getResult(0);
     }
   } else if (vop == VecOp::Sld || vop == VecOp::Sldw) {
     assert(args.size() == 3);
     auto constIntOp = mlir::dyn_cast_or_null<mlir::IntegerAttr>(
         mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp())
             .getValue());
     assert(constIntOp && "expected integer constant argument");

     // Bitcast to vector<16xi8>
     auto vi8Ty{mlir::VectorType::get(16, builder.getIntegerType(8))};
     if (mlirTyArgs[0] != vi8Ty) {
       mlirVecArgs[0] =
           builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[0])
               .getResult();
       mlirVecArgs[1] =
           builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[1])
               .getResult();
     }

     // Construct the mask for shuffling
     auto shiftVal{constIntOp.getInt()};
     if (vop == VecOp::Sldw)
       shiftVal = shiftVal << 2;
     shiftVal &= 0xF;
     llvm::SmallVector<int64_t, 16> mask;
     // Shuffle with mask based on the endianness
     const auto triple{fir::getTargetTriple(builder.getModule())};
     if (triple.isLittleEndian()) {
       for (int i = 16; i < 32; ++i)
         mask.push_back(i - shiftVal);
       shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[1],
                                                         mlirVecArgs[0], mask);
     } else {
       for (int i = 0; i < 16; ++i)
         mask.push_back(i + shiftVal);
       shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[0],
                                                         mlirVecArgs[1], mask);
     }

     // Bitcast to the original type
     if (shftRes.getType() != mlirTyArgs[0])
       shftRes =
           builder.create<mlir::LLVM::BitcastOp>(loc, mlirTyArgs[0], shftRes);

     return builder.createConvert(loc, resultType, shftRes);
   } else
     llvm_unreachable("Invalid vector operation for generator");

   return shftRes;
 }

 // VEC_SPLAT, VEC_SPLATS, VEC_SPLAT_S32
 template <VecOp vop>
 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType,
                                  llvm::ArrayRef<fir::ExtendedValue> args) {
   auto context{builder.getContext()};
   auto argBases{getBasesForArgs(args)};

   mlir::vector::SplatOp splatOp{nullptr};
   mlir::Type retTy{nullptr};
   switch (vop) {
   case VecOp::Splat: {
     assert(args.size() == 2);
     auto vecTyInfo{getVecTypeFromFir(argBases[0])};

     auto extractOp{genVecExtract(resultType, args)};
     splatOp = builder.create<mlir::vector::SplatOp>(
         loc, *(extractOp.getUnboxed()), vecTyInfo.toMlirVectorType(context));
     retTy = vecTyInfo.toFirVectorType();
     break;
   }
   case VecOp::Splats: {
     assert(args.size() == 1);
     auto vecTyInfo{getVecTypeFromEle(argBases[0])};

     splatOp = builder.create<mlir::vector::SplatOp>(
         loc, argBases[0], vecTyInfo.toMlirVectorType(context));
     retTy = vecTyInfo.toFirVectorType();
     break;
   }
   case VecOp::Splat_s32: {
     assert(args.size() == 1);
     auto eleTy{builder.getIntegerType(32)};
     auto intOp{builder.createConvert(loc, eleTy, argBases[0])};

     // the intrinsic always returns vector(integer(4))
     splatOp = builder.create<mlir::vector::SplatOp>(
         loc, intOp, mlir::VectorType::get(4, eleTy));
     retTy = fir::VectorType::get(4, eleTy);
     break;
   }
   default:
     llvm_unreachable("invalid vector operation for generator");
   }
   return builder.createConvert(loc, retTy, splatOp);
 }

 fir::ExtendedValue
 PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType,
                                 llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);
   auto arg0{getBase(args[0])};
   auto arg1{getBase(args[1])};

   // Prepare the return type in FIR.
   auto vecTyInfo{getVecTypeFromFirType(resultType)};
   auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
   auto firTy{vecTyInfo.toFirVectorType()};

   // Add the %val of arg0 to %addr of arg1
   auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};

   auto i64Ty{mlir::IntegerType::get(builder.getContext(), 64)};
   auto i64VecTy{mlir::VectorType::get(2, i64Ty)};
   auto i64RefTy{builder.getRefType(i64Ty)};
   auto addrConv{builder.create<fir::ConvertOp>(loc, i64RefTy, addr)};

   auto addrVal{builder.create<fir::LoadOp>(loc, addrConv)};
   auto splatRes{builder.create<mlir::vector::SplatOp>(loc, addrVal, i64VecTy)};

   mlir::Value result{nullptr};
   if (mlirTy != splatRes.getType()) {
     result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, splatRes);
   } else
     result = splatRes;

   return builder.createConvert(loc, firTy, result);
 }

 const char *getMmaIrIntrName(MMAOp mmaOp) {
   switch (mmaOp) {
   case MMAOp::AssembleAcc:
     return "llvm.ppc.mma.assemble.acc";
   case MMAOp::AssemblePair:
     return "llvm.ppc.vsx.assemble.pair";
   case MMAOp::DisassembleAcc:
     return "llvm.ppc.mma.disassemble.acc";
   case MMAOp::DisassemblePair:
     return "llvm.ppc.vsx.disassemble.pair";
   case MMAOp::Xxmfacc:
     return "llvm.ppc.mma.xxmfacc";
   case MMAOp::Xxmtacc:
     return "llvm.ppc.mma.xxmtacc";
   case MMAOp::Xxsetaccz:
     return "llvm.ppc.mma.xxsetaccz";
   case MMAOp::Pmxvbf16ger2:
     return "llvm.ppc.mma.pmxvbf16ger2";
   case MMAOp::Pmxvbf16ger2nn:
     return "llvm.ppc.mma.pmxvbf16ger2nn";
   case MMAOp::Pmxvbf16ger2np:
     return "llvm.ppc.mma.pmxvbf16ger2np";
   case MMAOp::Pmxvbf16ger2pn:
     return "llvm.ppc.mma.pmxvbf16ger2pn";
   case MMAOp::Pmxvbf16ger2pp:
     return "llvm.ppc.mma.pmxvbf16ger2pp";
   case MMAOp::Pmxvf16ger2:
     return "llvm.ppc.mma.pmxvf16ger2";
   case MMAOp::Pmxvf16ger2nn:
     return "llvm.ppc.mma.pmxvf16ger2nn";
   case MMAOp::Pmxvf16ger2np:
     return "llvm.ppc.mma.pmxvf16ger2np";
   case MMAOp::Pmxvf16ger2pn:
     return "llvm.ppc.mma.pmxvf16ger2pn";
   case MMAOp::Pmxvf16ger2pp:
     return "llvm.ppc.mma.pmxvf16ger2pp";
   case MMAOp::Pmxvf32ger:
     return "llvm.ppc.mma.pmxvf32ger";
   case MMAOp::Pmxvf32gernn:
     return "llvm.ppc.mma.pmxvf32gernn";
   case MMAOp::Pmxvf32gernp:
     return "llvm.ppc.mma.pmxvf32gernp";
   case MMAOp::Pmxvf32gerpn:
     return "llvm.ppc.mma.pmxvf32gerpn";
   case MMAOp::Pmxvf32gerpp:
     return "llvm.ppc.mma.pmxvf32gerpp";
   case MMAOp::Pmxvf64ger:
     return "llvm.ppc.mma.pmxvf64ger";
   case MMAOp::Pmxvf64gernn:
     return "llvm.ppc.mma.pmxvf64gernn";
   case MMAOp::Pmxvf64gernp:
     return "llvm.ppc.mma.pmxvf64gernp";
   case MMAOp::Pmxvf64gerpn:
     return "llvm.ppc.mma.pmxvf64gerpn";
   case MMAOp::Pmxvf64gerpp:
     return "llvm.ppc.mma.pmxvf64gerpp";
   case MMAOp::Pmxvi16ger2:
     return "llvm.ppc.mma.pmxvi16ger2";
   case MMAOp::Pmxvi16ger2pp:
     return "llvm.ppc.mma.pmxvi16ger2pp";
   case MMAOp::Pmxvi16ger2s:
     return "llvm.ppc.mma.pmxvi16ger2s";
   case MMAOp::Pmxvi16ger2spp:
     return "llvm.ppc.mma.pmxvi16ger2spp";
   case MMAOp::Pmxvi4ger8:
     return "llvm.ppc.mma.pmxvi4ger8";
   case MMAOp::Pmxvi4ger8pp:
     return "llvm.ppc.mma.pmxvi4ger8pp";
   case MMAOp::Pmxvi8ger4:
     return "llvm.ppc.mma.pmxvi8ger4";
   case MMAOp::Pmxvi8ger4pp:
     return "llvm.ppc.mma.pmxvi8ger4pp";
   case MMAOp::Pmxvi8ger4spp:
     return "llvm.ppc.mma.pmxvi8ger4spp";
   case MMAOp::Xvbf16ger2:
     return "llvm.ppc.mma.xvbf16ger2";
   case MMAOp::Xvbf16ger2nn:
     return "llvm.ppc.mma.xvbf16ger2nn";
   case MMAOp::Xvbf16ger2np:
     return "llvm.ppc.mma.xvbf16ger2np";
   case MMAOp::Xvbf16ger2pn:
     return "llvm.ppc.mma.xvbf16ger2pn";
   case MMAOp::Xvbf16ger2pp:
     return "llvm.ppc.mma.xvbf16ger2pp";
   case MMAOp::Xvf16ger2:
     return "llvm.ppc.mma.xvf16ger2";
   case MMAOp::Xvf16ger2nn:
     return "llvm.ppc.mma.xvf16ger2nn";
   case MMAOp::Xvf16ger2np:
     return "llvm.ppc.mma.xvf16ger2np";
   case MMAOp::Xvf16ger2pn:
     return "llvm.ppc.mma.xvf16ger2pn";
   case MMAOp::Xvf16ger2pp:
     return "llvm.ppc.mma.xvf16ger2pp";
   case MMAOp::Xvf32ger:
     return "llvm.ppc.mma.xvf32ger";
   case MMAOp::Xvf32gernn:
     return "llvm.ppc.mma.xvf32gernn";
   case MMAOp::Xvf32gernp:
     return "llvm.ppc.mma.xvf32gernp";
   case MMAOp::Xvf32gerpn:
     return "llvm.ppc.mma.xvf32gerpn";
   case MMAOp::Xvf32gerpp:
     return "llvm.ppc.mma.xvf32gerpp";
   case MMAOp::Xvf64ger:
     return "llvm.ppc.mma.xvf64ger";
   case MMAOp::Xvf64gernn:
     return "llvm.ppc.mma.xvf64gernn";
   case MMAOp::Xvf64gernp:
     return "llvm.ppc.mma.xvf64gernp";
   case MMAOp::Xvf64gerpn:
     return "llvm.ppc.mma.xvf64gerpn";
   case MMAOp::Xvf64gerpp:
     return "llvm.ppc.mma.xvf64gerpp";
   case MMAOp::Xvi16ger2:
     return "llvm.ppc.mma.xvi16ger2";
   case MMAOp::Xvi16ger2pp:
     return "llvm.ppc.mma.xvi16ger2pp";
   case MMAOp::Xvi16ger2s:
     return "llvm.ppc.mma.xvi16ger2s";
   case MMAOp::Xvi16ger2spp:
     return "llvm.ppc.mma.xvi16ger2spp";
   case MMAOp::Xvi4ger8:
     return "llvm.ppc.mma.xvi4ger8";
   case MMAOp::Xvi4ger8pp:
     return "llvm.ppc.mma.xvi4ger8pp";
   case MMAOp::Xvi8ger4:
     return "llvm.ppc.mma.xvi8ger4";
   case MMAOp::Xvi8ger4pp:
     return "llvm.ppc.mma.xvi8ger4pp";
   case MMAOp::Xvi8ger4spp:
     return "llvm.ppc.mma.xvi8ger4spp";
   }
   llvm_unreachable("getMmaIrIntrName");
 }

 mlir::FunctionType getMmaIrFuncType(mlir::MLIRContext *context, MMAOp mmaOp) {
   switch (mmaOp) {
   case MMAOp::AssembleAcc:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 4);
   case MMAOp::AssemblePair:
     return genMmaVpFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::DisassembleAcc:
     return genMmaDisassembleFuncType(context, mmaOp);
   case MMAOp::DisassemblePair:
     return genMmaDisassembleFuncType(context, mmaOp);
   case MMAOp::Xxmfacc:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0);
   case MMAOp::Xxmtacc:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0);
   case MMAOp::Xxsetaccz:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 0);
   case MMAOp::Pmxvbf16ger2:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvbf16ger2nn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvbf16ger2np:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvbf16ger2pn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvbf16ger2pp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvf16ger2:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvf16ger2nn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvf16ger2np:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvf16ger2pn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvf16ger2pp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvf32ger:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 2);
   case MMAOp::Pmxvf32gernn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 2);
   case MMAOp::Pmxvf32gernp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 2);
   case MMAOp::Pmxvf32gerpn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 2);
   case MMAOp::Pmxvf32gerpp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 2);
   case MMAOp::Pmxvf64ger:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1,
                             /*Integer*/ 2);
   case MMAOp::Pmxvf64gernn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
                             /*Integer*/ 2);
   case MMAOp::Pmxvf64gernp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
                             /*Integer*/ 2);
   case MMAOp::Pmxvf64gerpn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
                             /*Integer*/ 2);
   case MMAOp::Pmxvf64gerpp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
                             /*Integer*/ 2);
   case MMAOp::Pmxvi16ger2:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvi16ger2pp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvi16ger2s:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvi16ger2spp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvi4ger8:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvi4ger8pp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvi8ger4:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvi8ger4pp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Pmxvi8ger4spp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
                             /*Integer*/ 3);
   case MMAOp::Xvbf16ger2:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvbf16ger2nn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvbf16ger2np:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvbf16ger2pn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvbf16ger2pp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf16ger2:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf16ger2nn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf16ger2np:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf16ger2pn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf16ger2pp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf32ger:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf32gernn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf32gernp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf32gerpn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf32gerpp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvf64ger:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1);
   case MMAOp::Xvf64gernn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
   case MMAOp::Xvf64gernp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
   case MMAOp::Xvf64gerpn:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
   case MMAOp::Xvf64gerpp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
   case MMAOp::Xvi16ger2:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvi16ger2pp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvi16ger2s:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvi16ger2spp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvi4ger8:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvi4ger8pp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvi8ger4:
     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvi8ger4pp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   case MMAOp::Xvi8ger4spp:
     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
   }
   llvm_unreachable("getMmaIrFuncType");
 }

 template <MMAOp IntrId, MMAHandlerOp HandlerOp>
 void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef<fir::ExtendedValue> args) {
   auto context{builder.getContext()};
   mlir::FunctionType intrFuncType{getMmaIrFuncType(context, IntrId)};
   mlir::func::FuncOp funcOp{
       builder.createFunction(loc, getMmaIrIntrName(IntrId), intrFuncType)};
   llvm::SmallVector<mlir::Value> intrArgs;

   // Depending on SubToFunc, change the subroutine call to a function call.
   // First argument represents the result. Rest of the arguments
   // are shifted one position to form the actual argument list.
   size_t argStart{0};
   size_t argStep{1};
   size_t e{args.size()};
   if (HandlerOp == MMAHandlerOp::SubToFunc) {
     // The first argument becomes function result. Start from the second
     // argument.
     argStart = 1;
   } else if (HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE) {
     // Reverse argument order on little-endian target only.
     // The reversal does not depend on the setting of non-native-order option.
     const auto triple{fir::getTargetTriple(builder.getModule())};
     if (triple.isLittleEndian()) {
       // Load the arguments in reverse order.
       argStart = args.size() - 1;
       // The first argument becomes function result. Stop at the second
       // argument.
       e = 0;
       argStep = -1;
     } else {
       // Load the arguments in natural order.
       // The first argument becomes function result. Start from the second
       // argument.
       argStart = 1;
     }
   }

   for (size_t i = argStart, j = 0; i != e; i += argStep, ++j) {
     auto v{fir::getBase(args[i])};
     if (i == 0 && HandlerOp == MMAHandlerOp::FirstArgIsResult) {
       // First argument is passed in as an address. We need to load
       // the content to match the LLVM interface.
       v = builder.create<fir::LoadOp>(loc, v);
     }
     auto vType{v.getType()};
     mlir::Type targetType{intrFuncType.getInput(j)};
     if (vType != targetType) {
       if (mlir::isa<mlir::VectorType>(targetType)) {
         // Perform vector type conversion for arguments passed by value.
         auto eleTy{mlir::dyn_cast<fir::VectorType>(vType).getEleTy()};
         auto len{mlir::dyn_cast<fir::VectorType>(vType).getLen()};
         mlir::VectorType mlirType = mlir::VectorType::get(len, eleTy);
         auto v0{builder.createConvert(loc, mlirType, v)};
         auto v1{builder.create<mlir::vector::BitCastOp>(loc, targetType, v0)};
         intrArgs.push_back(v1);
       } else if (mlir::isa<mlir::IntegerType>(targetType) &&
                  mlir::isa<mlir::IntegerType>(vType)) {
         auto v0{builder.createConvert(loc, targetType, v)};
         intrArgs.push_back(v0);
       } else {
         llvm::errs() << "\nUnexpected type conversion requested: "
                      << " from " << vType << " to " << targetType << "\n";
         llvm_unreachable("Unsupported type conversion for argument to PowerPC "
                          "MMA intrinsic");
       }
     } else {
       intrArgs.push_back(v);
     }
   }
   auto callSt{builder.create<fir::CallOp>(loc, funcOp, intrArgs)};
   if (HandlerOp == MMAHandlerOp::SubToFunc ||
       HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE ||
       HandlerOp == MMAHandlerOp::FirstArgIsResult) {
     // Convert pointer type if needed.
     mlir::Value callResult{callSt.getResult(0)};
     mlir::Value destPtr{fir::getBase(args[0])};
     mlir::Type callResultPtrType{builder.getRefType(callResult.getType())};
     if (destPtr.getType() != callResultPtrType) {
       destPtr = builder.create<fir::ConvertOp>(loc, callResultPtrType, destPtr);
     }
     // Copy the result.
     builder.create<fir::StoreOp>(loc, callResult, destPtr);
   }
 }

 // VEC_ST, VEC_STE
 template <VecOp vop>
 void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 3);

   auto context{builder.getContext()};
   auto argBases{getBasesForArgs(args)};
   auto arg1TyInfo{getVecTypeFromFir(argBases[0])};

   auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};

   llvm::StringRef fname{};
   mlir::VectorType stTy{nullptr};
   auto i32ty{mlir::IntegerType::get(context, 32)};
   switch (vop) {
   case VecOp::St:
     stTy = mlir::VectorType::get(4, i32ty);
     fname = "llvm.ppc.altivec.stvx";
     break;
   case VecOp::Ste: {
     const auto width{arg1TyInfo.eleTy.getIntOrFloatBitWidth()};
     const auto len{arg1TyInfo.len};

     if (arg1TyInfo.isFloat32()) {
       stTy = mlir::VectorType::get(len, i32ty);
       fname = "llvm.ppc.altivec.stvewx";
     } else if (mlir::isa<mlir::IntegerType>(arg1TyInfo.eleTy)) {
       stTy = mlir::VectorType::get(len, mlir::IntegerType::get(context, width));

       switch (width) {
       case 8:
         fname = "llvm.ppc.altivec.stvebx";
         break;
       case 16:
         fname = "llvm.ppc.altivec.stvehx";
         break;
       case 32:
         fname = "llvm.ppc.altivec.stvewx";
         break;
       default:
         assert(false && "invalid element size");
       }
     } else
       assert(false && "unknown type");
     break;
   }
   case VecOp::Stxvp:
     // __vector_pair type
     stTy = mlir::VectorType::get(256, mlir::IntegerType::get(context, 1));
     fname = "llvm.ppc.vsx.stxvp";
     break;
   default:
     llvm_unreachable("invalid vector operation for generator");
   }

   auto funcType{
       mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)};
   mlir::func::FuncOp funcOp = builder.createFunction(loc, fname, funcType);

   llvm::SmallVector<mlir::Value, 4> biArgs;

   if (vop == VecOp::Stxvp) {
     biArgs.push_back(argBases[0]);
     biArgs.push_back(addr);
     builder.create<fir::CallOp>(loc, funcOp, biArgs);
     return;
   }

   auto vecTyInfo{getVecTypeFromFirType(argBases[0].getType())};
   auto cnv{builder.createConvert(loc, vecTyInfo.toMlirVectorType(context),
                                  argBases[0])};

   mlir::Value newArg1{nullptr};
   if (stTy != arg1TyInfo.toMlirVectorType(context))
     newArg1 = builder.create<mlir::vector::BitCastOp>(loc, stTy, cnv);
   else
     newArg1 = cnv;

   if (isBEVecElemOrderOnLE())
     newArg1 = builder.createConvert(
         loc, stTy, reverseVectorElements(builder, loc, newArg1, 4));

   biArgs.push_back(newArg1);
   biArgs.push_back(addr);

   builder.create<fir::CallOp>(loc, funcOp, biArgs);
 }

 // VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4
 template <VecOp vop>
 void PPCIntrinsicLibrary::genVecXStore(
     llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 3);
   auto context{builder.getContext()};
   auto argBases{getBasesForArgs(args)};
   VecTypeInfo arg1TyInfo{getVecTypeFromFir(argBases[0])};

   auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};

   mlir::Value trg{nullptr};
   mlir::Value src{nullptr};

   switch (vop) {
   case VecOp::Xst:
   case VecOp::Xst_be: {
     src = argBases[0];
     trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
                                 addr);

     if (vop == VecOp::Xst_be || isBEVecElemOrderOnLE()) {
       auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
                                      argBases[0])};
       auto shf{reverseVectorElements(builder, loc, cnv, arg1TyInfo.len)};

       src = builder.createConvert(loc, arg1TyInfo.toFirVectorType(), shf);
     }
     break;
   }
   case VecOp::Xstd2:
   case VecOp::Xstw4: {
     // an 16-byte vector arg1 is treated as two 8-byte elements or
     // four 4-byte elements
     mlir::IntegerType elemTy;
     uint64_t numElem = (vop == VecOp::Xstd2) ? 2 : 4;
     elemTy = builder.getIntegerType(128 / numElem);

     mlir::VectorType mlirVecTy{mlir::VectorType::get(numElem, elemTy)};
     fir::VectorType firVecTy{fir::VectorType::get(numElem, elemTy)};

     auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
                                    argBases[0])};

     mlir::Type srcTy{nullptr};
     if (numElem != arg1TyInfo.len) {
       cnv = builder.create<mlir::vector::BitCastOp>(loc, mlirVecTy, cnv);
       srcTy = firVecTy;
     } else {
       srcTy = arg1TyInfo.toFirVectorType();
     }

     trg = builder.createConvert(loc, builder.getRefType(srcTy), addr);

     if (isBEVecElemOrderOnLE()) {
       cnv = reverseVectorElements(builder, loc, cnv, numElem);
     }

     src = builder.createConvert(loc, srcTy, cnv);
     break;
   }
   case VecOp::Stxv:
     src = argBases[0];
     trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
                                 addr);
     break;
   default:
     assert(false && "Invalid vector operation for generator");
   }
   builder.create<fir::StoreOp>(loc, mlir::TypeRange{},
                                mlir::ValueRange{src, trg},
                                getAlignmentAttr(builder, 1));
 }

 } // namespace fir