mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp - llvm-project - Git at Google

 //===- StandardToLLVM.cpp - Standard to LLVM dialect conversion -----------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements a pass to convert MLIR standard and builtin dialects
 // into the LLVM IR dialect.
 //
 //===----------------------------------------------------------------------===//

 #include "../PassDetail.h"
 #include "mlir/Analysis/DataLayoutAnalysis.h"
 #include "mlir/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/VectorPattern.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/Passes.h"
 #include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormatVariadic.h"
 #include <functional>

 using namespace mlir;

 #define PASS_NAME "convert-std-to-llvm"

 /// Only retain those attributes that are not constructed by
 /// `LLVMFuncOp::build`. If `filterArgAttrs` is set, also filter out argument
 /// attributes.
 static void filterFuncAttributes(ArrayRef<NamedAttribute> attrs,
                                  bool filterArgAttrs,
                                  SmallVectorImpl<NamedAttribute> &result) {
   for (const auto &attr : attrs) {
     if (attr.getName() == SymbolTable::getSymbolAttrName() ||
         attr.getName() == function_like_impl::getTypeAttrName() ||
         attr.getName() == "std.varargs" ||
         (filterArgAttrs &&
          attr.getName() == function_like_impl::getArgDictAttrName()))
       continue;
     result.push_back(attr);
   }
 }

 /// Creates an auxiliary function with pointer-to-memref-descriptor-struct
 /// arguments instead of unpacked arguments. This function can be called from C
 /// by passing a pointer to a C struct corresponding to a memref descriptor.
 /// Similarly, returned memrefs are passed via pointers to a C struct that is
 /// passed as additional argument.
 /// Internally, the auxiliary function unpacks the descriptor into individual
 /// components and forwards them to `newFuncOp` and forwards the results to
 /// the extra arguments.
 static void wrapForExternalCallers(OpBuilder &rewriter, Location loc,
                                    LLVMTypeConverter &typeConverter,
                                    FuncOp funcOp, LLVM::LLVMFuncOp newFuncOp) {
   auto type = funcOp.getType();
   SmallVector<NamedAttribute, 4> attributes;
   filterFuncAttributes(funcOp->getAttrs(), /*filterArgAttrs=*/false,
                        attributes);
   Type wrapperFuncType;
   bool resultIsNowArg;
   std::tie(wrapperFuncType, resultIsNowArg) =
       typeConverter.convertFunctionTypeCWrapper(type);
   auto wrapperFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
       loc, llvm::formatv("_mlir_ciface_{0}", funcOp.getName()).str(),
       wrapperFuncType, LLVM::Linkage::External, /*dsoLocal*/ false, attributes);

   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPointToStart(wrapperFuncOp.addEntryBlock());

   SmallVector<Value, 8> args;
   size_t argOffset = resultIsNowArg ? 1 : 0;
   for (auto &en : llvm::enumerate(type.getInputs())) {
     Value arg = wrapperFuncOp.getArgument(en.index() + argOffset);
     if (auto memrefType = en.value().dyn_cast<MemRefType>()) {
       Value loaded = rewriter.create<LLVM::LoadOp>(loc, arg);
       MemRefDescriptor::unpack(rewriter, loc, loaded, memrefType, args);
       continue;
     }
     if (en.value().isa<UnrankedMemRefType>()) {
       Value loaded = rewriter.create<LLVM::LoadOp>(loc, arg);
       UnrankedMemRefDescriptor::unpack(rewriter, loc, loaded, args);
       continue;
     }

     args.push_back(arg);
   }

   auto call = rewriter.create<LLVM::CallOp>(loc, newFuncOp, args);

   if (resultIsNowArg) {
     rewriter.create<LLVM::StoreOp>(loc, call.getResult(0),
                                    wrapperFuncOp.getArgument(0));
     rewriter.create<LLVM::ReturnOp>(loc, ValueRange{});
   } else {
     rewriter.create<LLVM::ReturnOp>(loc, call.getResults());
   }
 }

 /// Creates an auxiliary function with pointer-to-memref-descriptor-struct
 /// arguments instead of unpacked arguments. Creates a body for the (external)
 /// `newFuncOp` that allocates a memref descriptor on stack, packs the
 /// individual arguments into this descriptor and passes a pointer to it into
 /// the auxiliary function. If the result of the function cannot be directly
 /// returned, we write it to a special first argument that provides a pointer
 /// to a corresponding struct. This auxiliary external function is now
 /// compatible with functions defined in C using pointers to C structs
 /// corresponding to a memref descriptor.
 static void wrapExternalFunction(OpBuilder &builder, Location loc,
                                  LLVMTypeConverter &typeConverter,
                                  FuncOp funcOp, LLVM::LLVMFuncOp newFuncOp) {
   OpBuilder::InsertionGuard guard(builder);

   Type wrapperType;
   bool resultIsNowArg;
   std::tie(wrapperType, resultIsNowArg) =
       typeConverter.convertFunctionTypeCWrapper(funcOp.getType());
   // This conversion can only fail if it could not convert one of the argument
   // types. But since it has been applied to a non-wrapper function before, it
   // should have failed earlier and not reach this point at all.
   assert(wrapperType && "unexpected type conversion failure");

   SmallVector<NamedAttribute, 4> attributes;
   filterFuncAttributes(funcOp->getAttrs(), /*filterArgAttrs=*/false,
                        attributes);

   // Create the auxiliary function.
   auto wrapperFunc = builder.create<LLVM::LLVMFuncOp>(
       loc, llvm::formatv("_mlir_ciface_{0}", funcOp.getName()).str(),
       wrapperType, LLVM::Linkage::External, /*dsoLocal*/ false, attributes);

   builder.setInsertionPointToStart(newFuncOp.addEntryBlock());

   // Get a ValueRange containing arguments.
   FunctionType type = funcOp.getType();
   SmallVector<Value, 8> args;
   args.reserve(type.getNumInputs());
   ValueRange wrapperArgsRange(newFuncOp.getArguments());

   if (resultIsNowArg) {
     // Allocate the struct on the stack and pass the pointer.
     Type resultType =
         wrapperType.cast<LLVM::LLVMFunctionType>().getParamType(0);
     Value one = builder.create<LLVM::ConstantOp>(
         loc, typeConverter.convertType(builder.getIndexType()),
         builder.getIntegerAttr(builder.getIndexType(), 1));
     Value result = builder.create<LLVM::AllocaOp>(loc, resultType, one);
     args.push_back(result);
   }

   // Iterate over the inputs of the original function and pack values into
   // memref descriptors if the original type is a memref.
   for (auto &en : llvm::enumerate(type.getInputs())) {
     Value arg;
     int numToDrop = 1;
     auto memRefType = en.value().dyn_cast<MemRefType>();
     auto unrankedMemRefType = en.value().dyn_cast<UnrankedMemRefType>();
     if (memRefType || unrankedMemRefType) {
       numToDrop = memRefType
                       ? MemRefDescriptor::getNumUnpackedValues(memRefType)
                       : UnrankedMemRefDescriptor::getNumUnpackedValues();
       Value packed =
           memRefType
               ? MemRefDescriptor::pack(builder, loc, typeConverter, memRefType,
                                        wrapperArgsRange.take_front(numToDrop))
               : UnrankedMemRefDescriptor::pack(
                     builder, loc, typeConverter, unrankedMemRefType,
                     wrapperArgsRange.take_front(numToDrop));

       auto ptrTy = LLVM::LLVMPointerType::get(packed.getType());
       Value one = builder.create<LLVM::ConstantOp>(
           loc, typeConverter.convertType(builder.getIndexType()),
           builder.getIntegerAttr(builder.getIndexType(), 1));
       Value allocated =
           builder.create<LLVM::AllocaOp>(loc, ptrTy, one, /*alignment=*/0);
       builder.create<LLVM::StoreOp>(loc, packed, allocated);
       arg = allocated;
     } else {
       arg = wrapperArgsRange[0];
     }

     args.push_back(arg);
     wrapperArgsRange = wrapperArgsRange.drop_front(numToDrop);
   }
   assert(wrapperArgsRange.empty() && "did not map some of the arguments");

   auto call = builder.create<LLVM::CallOp>(loc, wrapperFunc, args);

   if (resultIsNowArg) {
     Value result = builder.create<LLVM::LoadOp>(loc, args.front());
     builder.create<LLVM::ReturnOp>(loc, ValueRange{result});
   } else {
     builder.create<LLVM::ReturnOp>(loc, call.getResults());
   }
 }

 namespace {

 struct FuncOpConversionBase : public ConvertOpToLLVMPattern<FuncOp> {
 protected:
   using ConvertOpToLLVMPattern<FuncOp>::ConvertOpToLLVMPattern;

   // Convert input FuncOp to LLVMFuncOp by using the LLVMTypeConverter provided
   // to this legalization pattern.
   LLVM::LLVMFuncOp
   convertFuncOpToLLVMFuncOp(FuncOp funcOp,
                             ConversionPatternRewriter &rewriter) const {
     // Convert the original function arguments. They are converted using the
     // LLVMTypeConverter provided to this legalization pattern.
     auto varargsAttr = funcOp->getAttrOfType<BoolAttr>("std.varargs");
     TypeConverter::SignatureConversion result(funcOp.getNumArguments());
     auto llvmType = getTypeConverter()->convertFunctionSignature(
         funcOp.getType(), varargsAttr && varargsAttr.getValue(), result);
     if (!llvmType)
       return nullptr;

     // Propagate argument attributes to all converted arguments obtained after
     // converting a given original argument.
     SmallVector<NamedAttribute, 4> attributes;
     filterFuncAttributes(funcOp->getAttrs(), /*filterArgAttrs=*/true,
                          attributes);
     if (ArrayAttr argAttrDicts = funcOp.getAllArgAttrs()) {
       SmallVector<Attribute, 4> newArgAttrs(
           llvmType.cast<LLVM::LLVMFunctionType>().getNumParams());
       for (unsigned i = 0, e = funcOp.getNumArguments(); i < e; ++i) {
         auto mapping = result.getInputMapping(i);
         assert(mapping.hasValue() &&
                "unexpected deletion of function argument");
         for (size_t j = 0; j < mapping->size; ++j)
           newArgAttrs[mapping->inputNo + j] = argAttrDicts[i];
       }
       attributes.push_back(
           rewriter.getNamedAttr(function_like_impl::getArgDictAttrName(),
                                 rewriter.getArrayAttr(newArgAttrs)));
     }
     for (auto pair : llvm::enumerate(attributes)) {
       if (pair.value().getName() == "llvm.linkage") {
         attributes.erase(attributes.begin() + pair.index());
         break;
       }
     }

     // Create an LLVM function, use external linkage by default until MLIR
     // functions have linkage.
     LLVM::Linkage linkage = LLVM::Linkage::External;
     if (funcOp->hasAttr("llvm.linkage")) {
       auto attr =
           funcOp->getAttr("llvm.linkage").dyn_cast<mlir::LLVM::LinkageAttr>();
       if (!attr) {
         funcOp->emitError()
             << "Contains llvm.linkage attribute not of type LLVM::LinkageAttr";
         return nullptr;
       }
       linkage = attr.getLinkage();
     }
     auto newFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
         funcOp.getLoc(), funcOp.getName(), llvmType, linkage,
         /*dsoLocal*/ false, attributes);
     rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
                                 newFuncOp.end());
     if (failed(rewriter.convertRegionTypes(&newFuncOp.getBody(), *typeConverter,
                                            &result)))
       return nullptr;

     return newFuncOp;
   }
 };

 /// FuncOp legalization pattern that converts MemRef arguments to pointers to
 /// MemRef descriptors (LLVM struct data types) containing all the MemRef type
 /// information.
 static constexpr StringRef kEmitIfaceAttrName = "llvm.emit_c_interface";
 struct FuncOpConversion : public FuncOpConversionBase {
   FuncOpConversion(LLVMTypeConverter &converter)
       : FuncOpConversionBase(converter) {}

   LogicalResult
   matchAndRewrite(FuncOp funcOp, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     auto newFuncOp = convertFuncOpToLLVMFuncOp(funcOp, rewriter);
     if (!newFuncOp)
       return failure();

     if (getTypeConverter()->getOptions().emitCWrappers ||
         funcOp->getAttrOfType<UnitAttr>(kEmitIfaceAttrName)) {
       if (newFuncOp.isExternal())
         wrapExternalFunction(rewriter, funcOp.getLoc(), *getTypeConverter(),
                              funcOp, newFuncOp);
       else
         wrapForExternalCallers(rewriter, funcOp.getLoc(), *getTypeConverter(),
                                funcOp, newFuncOp);
     }

     rewriter.eraseOp(funcOp);
     return success();
   }
 };

 /// FuncOp legalization pattern that converts MemRef arguments to bare pointers
 /// to the MemRef element type. This will impact the calling convention and ABI.
 struct BarePtrFuncOpConversion : public FuncOpConversionBase {
   using FuncOpConversionBase::FuncOpConversionBase;

   LogicalResult
   matchAndRewrite(FuncOp funcOp, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {

     // TODO: bare ptr conversion could be handled by argument materialization
     // and most of the code below would go away. But to do this, we would need a
     // way to distinguish between FuncOp and other regions in the
     // addArgumentMaterialization hook.

     // Store the type of memref-typed arguments before the conversion so that we
     // can promote them to MemRef descriptor at the beginning of the function.
     SmallVector<Type, 8> oldArgTypes =
         llvm::to_vector<8>(funcOp.getType().getInputs());

     auto newFuncOp = convertFuncOpToLLVMFuncOp(funcOp, rewriter);
     if (!newFuncOp)
       return failure();
     if (newFuncOp.getBody().empty()) {
       rewriter.eraseOp(funcOp);
       return success();
     }

     // Promote bare pointers from memref arguments to memref descriptors at the
     // beginning of the function so that all the memrefs in the function have a
     // uniform representation.
     Block *entryBlock = &newFuncOp.getBody().front();
     auto blockArgs = entryBlock->getArguments();
     assert(blockArgs.size() == oldArgTypes.size() &&
            "The number of arguments and types doesn't match");

     OpBuilder::InsertionGuard guard(rewriter);
     rewriter.setInsertionPointToStart(entryBlock);
     for (auto it : llvm::zip(blockArgs, oldArgTypes)) {
       BlockArgument arg = std::get<0>(it);
       Type argTy = std::get<1>(it);

       // Unranked memrefs are not supported in the bare pointer calling
       // convention. We should have bailed out before in the presence of
       // unranked memrefs.
       assert(!argTy.isa<UnrankedMemRefType>() &&
              "Unranked memref is not supported");
       auto memrefTy = argTy.dyn_cast<MemRefType>();
       if (!memrefTy)
         continue;

       // Replace barePtr with a placeholder (undef), promote barePtr to a ranked
       // or unranked memref descriptor and replace placeholder with the last
       // instruction of the memref descriptor.
       // TODO: The placeholder is needed to avoid replacing barePtr uses in the
       // MemRef descriptor instructions. We may want to have a utility in the
       // rewriter to properly handle this use case.
       Location loc = funcOp.getLoc();
       auto placeholder = rewriter.create<LLVM::UndefOp>(
           loc, getTypeConverter()->convertType(memrefTy));
       rewriter.replaceUsesOfBlockArgument(arg, placeholder);

       Value desc = MemRefDescriptor::fromStaticShape(
           rewriter, loc, *getTypeConverter(), memrefTy, arg);
       rewriter.replaceOp(placeholder, {desc});
     }

     rewriter.eraseOp(funcOp);
     return success();
   }
 };

 // Straightforward lowerings.
 using SelectOpLowering = VectorConvertToLLVMPattern<SelectOp, LLVM::SelectOp>;

 /// Lower `std.assert`. The default lowering calls the `abort` function if the
 /// assertion is violated and has no effect otherwise. The failure message is
 /// ignored by the default lowering but should be propagated by any custom
 /// lowering.
 struct AssertOpLowering : public ConvertOpToLLVMPattern<AssertOp> {
   using ConvertOpToLLVMPattern<AssertOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(AssertOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     auto loc = op.getLoc();

     // Insert the `abort` declaration if necessary.
     auto module = op->getParentOfType<ModuleOp>();
     auto abortFunc = module.lookupSymbol<LLVM::LLVMFuncOp>("abort");
     if (!abortFunc) {
       OpBuilder::InsertionGuard guard(rewriter);
       rewriter.setInsertionPointToStart(module.getBody());
       auto abortFuncTy = LLVM::LLVMFunctionType::get(getVoidType(), {});
       abortFunc = rewriter.create<LLVM::LLVMFuncOp>(rewriter.getUnknownLoc(),
                                                     "abort", abortFuncTy);
     }

     // Split block at `assert` operation.
     Block *opBlock = rewriter.getInsertionBlock();
     auto opPosition = rewriter.getInsertionPoint();
     Block *continuationBlock = rewriter.splitBlock(opBlock, opPosition);

     // Generate IR to call `abort`.
     Block *failureBlock = rewriter.createBlock(opBlock->getParent());
     rewriter.create<LLVM::CallOp>(loc, abortFunc, llvm::None);
     rewriter.create<LLVM::UnreachableOp>(loc);

     // Generate assertion test.
     rewriter.setInsertionPointToEnd(opBlock);
     rewriter.replaceOpWithNewOp<LLVM::CondBrOp>(
         op, adaptor.getArg(), continuationBlock, failureBlock);

     return success();
   }
 };

 struct ConstantOpLowering : public ConvertOpToLLVMPattern<ConstantOp> {
   using ConvertOpToLLVMPattern<ConstantOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(ConstantOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     // If constant refers to a function, convert it to "addressof".
     if (auto symbolRef = op.getValue().dyn_cast<FlatSymbolRefAttr>()) {
       auto type = typeConverter->convertType(op.getResult().getType());
       if (!type || !LLVM::isCompatibleType(type))
         return rewriter.notifyMatchFailure(op, "failed to convert result type");

       auto newOp = rewriter.create<LLVM::AddressOfOp>(op.getLoc(), type,
                                                       symbolRef.getValue());
       for (const NamedAttribute &attr : op->getAttrs()) {
         if (attr.getName().strref() == "value")
           continue;
         newOp->setAttr(attr.getName(), attr.getValue());
       }
       rewriter.replaceOp(op, newOp->getResults());
       return success();
     }

     // Calling into other scopes (non-flat reference) is not supported in LLVM.
     if (op.getValue().isa<SymbolRefAttr>())
       return rewriter.notifyMatchFailure(
           op, "referring to a symbol outside of the current module");

     return LLVM::detail::oneToOneRewrite(
         op, LLVM::ConstantOp::getOperationName(), adaptor.getOperands(),
         *getTypeConverter(), rewriter);
   }
 };

 // A CallOp automatically promotes MemRefType to a sequence of alloca/store and
 // passes the pointer to the MemRef across function boundaries.
 template <typename CallOpType>
 struct CallOpInterfaceLowering : public ConvertOpToLLVMPattern<CallOpType> {
   using ConvertOpToLLVMPattern<CallOpType>::ConvertOpToLLVMPattern;
   using Super = CallOpInterfaceLowering<CallOpType>;
   using Base = ConvertOpToLLVMPattern<CallOpType>;

   LogicalResult
   matchAndRewrite(CallOpType callOp, typename CallOpType::Adaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     // Pack the result types into a struct.
     Type packedResult = nullptr;
     unsigned numResults = callOp.getNumResults();
     auto resultTypes = llvm::to_vector<4>(callOp.getResultTypes());

     if (numResults != 0) {
       if (!(packedResult =
                 this->getTypeConverter()->packFunctionResults(resultTypes)))
         return failure();
     }

     auto promoted = this->getTypeConverter()->promoteOperands(
         callOp.getLoc(), /*opOperands=*/callOp->getOperands(),
         adaptor.getOperands(), rewriter);
     auto newOp = rewriter.create<LLVM::CallOp>(
         callOp.getLoc(), packedResult ? TypeRange(packedResult) : TypeRange(),
         promoted, callOp->getAttrs());

     SmallVector<Value, 4> results;
     if (numResults < 2) {
       // If < 2 results, packing did not do anything and we can just return.
       results.append(newOp.result_begin(), newOp.result_end());
     } else {
       // Otherwise, it had been converted to an operation producing a structure.
       // Extract individual results from the structure and return them as list.
       results.reserve(numResults);
       for (unsigned i = 0; i < numResults; ++i) {
         auto type =
             this->typeConverter->convertType(callOp.getResult(i).getType());
         results.push_back(rewriter.create<LLVM::ExtractValueOp>(
             callOp.getLoc(), type, newOp->getResult(0),
             rewriter.getI64ArrayAttr(i)));
       }
     }

     if (this->getTypeConverter()->getOptions().useBarePtrCallConv) {
       // For the bare-ptr calling convention, promote memref results to
       // descriptors.
       assert(results.size() == resultTypes.size() &&
              "The number of arguments and types doesn't match");
       this->getTypeConverter()->promoteBarePtrsToDescriptors(
           rewriter, callOp.getLoc(), resultTypes, results);
     } else if (failed(this->copyUnrankedDescriptors(rewriter, callOp.getLoc(),
                                                     resultTypes, results,
                                                     /*toDynamic=*/false))) {
       return failure();
     }

     rewriter.replaceOp(callOp, results);
     return success();
   }
 };

 struct CallOpLowering : public CallOpInterfaceLowering<CallOp> {
   using Super::Super;
 };

 struct CallIndirectOpLowering : public CallOpInterfaceLowering<CallIndirectOp> {
   using Super::Super;
 };

 struct UnrealizedConversionCastOpLowering
     : public ConvertOpToLLVMPattern<UnrealizedConversionCastOp> {
   using ConvertOpToLLVMPattern<
       UnrealizedConversionCastOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(UnrealizedConversionCastOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     SmallVector<Type> convertedTypes;
     if (succeeded(typeConverter->convertTypes(op.outputs().getTypes(),
                                               convertedTypes)) &&
         convertedTypes == adaptor.inputs().getTypes()) {
       rewriter.replaceOp(op, adaptor.inputs());
       return success();
     }

     convertedTypes.clear();
     if (succeeded(typeConverter->convertTypes(adaptor.inputs().getTypes(),
                                               convertedTypes)) &&
         convertedTypes == op.outputs().getType()) {
       rewriter.replaceOp(op, adaptor.inputs());
       return success();
     }
     return failure();
   }
 };

 struct RankOpLowering : public ConvertOpToLLVMPattern<RankOp> {
   using ConvertOpToLLVMPattern<RankOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(RankOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     Type operandType = op.getMemrefOrTensor().getType();
     if (auto unrankedMemRefType = operandType.dyn_cast<UnrankedMemRefType>()) {
       UnrankedMemRefDescriptor desc(adaptor.getMemrefOrTensor());
       rewriter.replaceOp(op, {desc.rank(rewriter, loc)});
       return success();
     }
     if (auto rankedMemRefType = operandType.dyn_cast<MemRefType>()) {
       rewriter.replaceOp(
           op, {createIndexConstant(rewriter, loc, rankedMemRefType.getRank())});
       return success();
     }
     return failure();
   }
 };

 // Common base for load and store operations on MemRefs.  Restricts the match
 // to supported MemRef types. Provides functionality to emit code accessing a
 // specific element of the underlying data buffer.
 template <typename Derived>
 struct LoadStoreOpLowering : public ConvertOpToLLVMPattern<Derived> {
   using ConvertOpToLLVMPattern<Derived>::ConvertOpToLLVMPattern;
   using ConvertOpToLLVMPattern<Derived>::isConvertibleAndHasIdentityMaps;
   using Base = LoadStoreOpLowering<Derived>;

   LogicalResult match(Derived op) const override {
     MemRefType type = op.getMemRefType();
     return isConvertibleAndHasIdentityMaps(type) ? success() : failure();
   }
 };

 // Base class for LLVM IR lowering terminator operations with successors.
 template <typename SourceOp, typename TargetOp>
 struct OneToOneLLVMTerminatorLowering
     : public ConvertOpToLLVMPattern<SourceOp> {
   using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
   using Super = OneToOneLLVMTerminatorLowering<SourceOp, TargetOp>;

   LogicalResult
   matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<TargetOp>(op, adaptor.getOperands(),
                                           op->getSuccessors(), op->getAttrs());
     return success();
   }
 };

 // Special lowering pattern for `ReturnOps`.  Unlike all other operations,
 // `ReturnOp` interacts with the function signature and must have as many
 // operands as the function has return values.  Because in LLVM IR, functions
 // can only return 0 or 1 value, we pack multiple values into a structure type.
 // Emit `UndefOp` followed by `InsertValueOp`s to create such structure if
 // necessary before returning it
 struct ReturnOpLowering : public ConvertOpToLLVMPattern<ReturnOp> {
   using ConvertOpToLLVMPattern<ReturnOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(ReturnOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     unsigned numArguments = op.getNumOperands();
     SmallVector<Value, 4> updatedOperands;

     if (getTypeConverter()->getOptions().useBarePtrCallConv) {
       // For the bare-ptr calling convention, extract the aligned pointer to
       // be returned from the memref descriptor.
       for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
         Type oldTy = std::get<0>(it).getType();
         Value newOperand = std::get<1>(it);
         if (oldTy.isa<MemRefType>()) {
           MemRefDescriptor memrefDesc(newOperand);
           newOperand = memrefDesc.alignedPtr(rewriter, loc);
         } else if (oldTy.isa<UnrankedMemRefType>()) {
           // Unranked memref is not supported in the bare pointer calling
           // convention.
           return failure();
         }
         updatedOperands.push_back(newOperand);
       }
     } else {
       updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
       (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(),
                                     updatedOperands,
                                     /*toDynamic=*/true);
     }

     // If ReturnOp has 0 or 1 operand, create it and return immediately.
     if (numArguments == 0) {
       rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), ValueRange(),
                                                   op->getAttrs());
       return success();
     }
     if (numArguments == 1) {
       rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
           op, TypeRange(), updatedOperands, op->getAttrs());
       return success();
     }

     // Otherwise, we need to pack the arguments into an LLVM struct type before
     // returning.
     auto packedType = getTypeConverter()->packFunctionResults(
         llvm::to_vector<4>(op.getOperandTypes()));

     Value packed = rewriter.create<LLVM::UndefOp>(loc, packedType);
     for (unsigned i = 0; i < numArguments; ++i) {
       packed = rewriter.create<LLVM::InsertValueOp>(
           loc, packedType, packed, updatedOperands[i],
           rewriter.getI64ArrayAttr(i));
     }
     rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed,
                                                 op->getAttrs());
     return success();
   }
 };

 // FIXME: this should be tablegen'ed as well.
 struct BranchOpLowering
     : public OneToOneLLVMTerminatorLowering<BranchOp, LLVM::BrOp> {
   using Super::Super;
 };
 struct CondBranchOpLowering
     : public OneToOneLLVMTerminatorLowering<CondBranchOp, LLVM::CondBrOp> {
   using Super::Super;
 };
 struct SwitchOpLowering
     : public OneToOneLLVMTerminatorLowering<SwitchOp, LLVM::SwitchOp> {
   using Super::Super;
 };

 // The Splat operation is lowered to an insertelement + a shufflevector
 // operation. Splat to only 0-d and 1-d vector result types are lowered.
 struct SplatOpLowering : public ConvertOpToLLVMPattern<SplatOp> {
   using ConvertOpToLLVMPattern<SplatOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(SplatOp splatOp, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     VectorType resultType = splatOp.getType().dyn_cast<VectorType>();
     if (!resultType || resultType.getRank() > 1)
       return failure();

     // First insert it into an undef vector so we can shuffle it.
     auto vectorType = typeConverter->convertType(splatOp.getType());
     Value undef = rewriter.create<LLVM::UndefOp>(splatOp.getLoc(), vectorType);
     auto zero = rewriter.create<LLVM::ConstantOp>(
         splatOp.getLoc(),
         typeConverter->convertType(rewriter.getIntegerType(32)),
         rewriter.getZeroAttr(rewriter.getIntegerType(32)));

     // For 0-d vector, we simply do `insertelement`.
     if (resultType.getRank() == 0) {
       rewriter.replaceOpWithNewOp<LLVM::InsertElementOp>(
           splatOp, vectorType, undef, adaptor.getInput(), zero);
       return success();
     }

     // For 1-d vector, we additionally do a `vectorshuffle`.
     auto v = rewriter.create<LLVM::InsertElementOp>(
         splatOp.getLoc(), vectorType, undef, adaptor.getInput(), zero);

     int64_t width = splatOp.getType().cast<VectorType>().getDimSize(0);
     SmallVector<int32_t, 4> zeroValues(width, 0);

     // Shuffle the value across the desired number of elements.
     ArrayAttr zeroAttrs = rewriter.getI32ArrayAttr(zeroValues);
     rewriter.replaceOpWithNewOp<LLVM::ShuffleVectorOp>(splatOp, v, undef,
                                                        zeroAttrs);
     return success();
   }
 };

 // The Splat operation is lowered to an insertelement + a shufflevector
 // operation. Splat to only 2+-d vector result types are lowered by the
 // SplatNdOpLowering, the 1-d case is handled by SplatOpLowering.
 struct SplatNdOpLowering : public ConvertOpToLLVMPattern<SplatOp> {
   using ConvertOpToLLVMPattern<SplatOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(SplatOp splatOp, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     VectorType resultType = splatOp.getType().dyn_cast<VectorType>();
     if (!resultType || resultType.getRank() <= 1)
       return failure();

     // First insert it into an undef vector so we can shuffle it.
     auto loc = splatOp.getLoc();
     auto vectorTypeInfo =
         LLVM::detail::extractNDVectorTypeInfo(resultType, *getTypeConverter());
     auto llvmNDVectorTy = vectorTypeInfo.llvmNDVectorTy;
     auto llvm1DVectorTy = vectorTypeInfo.llvm1DVectorTy;
     if (!llvmNDVectorTy || !llvm1DVectorTy)
       return failure();

     // Construct returned value.
     Value desc = rewriter.create<LLVM::UndefOp>(loc, llvmNDVectorTy);

     // Construct a 1-D vector with the splatted value that we insert in all the
     // places within the returned descriptor.
     Value vdesc = rewriter.create<LLVM::UndefOp>(loc, llvm1DVectorTy);
     auto zero = rewriter.create<LLVM::ConstantOp>(
         loc, typeConverter->convertType(rewriter.getIntegerType(32)),
         rewriter.getZeroAttr(rewriter.getIntegerType(32)));
     Value v = rewriter.create<LLVM::InsertElementOp>(loc, llvm1DVectorTy, vdesc,
                                                      adaptor.getInput(), zero);

     // Shuffle the value across the desired number of elements.
     int64_t width = resultType.getDimSize(resultType.getRank() - 1);
     SmallVector<int32_t, 4> zeroValues(width, 0);
     ArrayAttr zeroAttrs = rewriter.getI32ArrayAttr(zeroValues);
     v = rewriter.create<LLVM::ShuffleVectorOp>(loc, v, v, zeroAttrs);

     // Iterate of linear index, convert to coords space and insert splatted 1-D
     // vector in each position.
     nDVectorIterate(vectorTypeInfo, rewriter, [&](ArrayAttr position) {
       desc = rewriter.create<LLVM::InsertValueOp>(loc, llvmNDVectorTy, desc, v,
                                                   position);
     });
     rewriter.replaceOp(splatOp, desc);
     return success();
   }
 };

 } // namespace

 /// Try to match the kind of a std.atomic_rmw to determine whether to use a
 /// lowering to llvm.atomicrmw or fallback to llvm.cmpxchg.
 static Optional<LLVM::AtomicBinOp> matchSimpleAtomicOp(AtomicRMWOp atomicOp) {
   switch (atomicOp.getKind()) {
   case AtomicRMWKind::addf:
     return LLVM::AtomicBinOp::fadd;
   case AtomicRMWKind::addi:
     return LLVM::AtomicBinOp::add;
   case AtomicRMWKind::assign:
     return LLVM::AtomicBinOp::xchg;
   case AtomicRMWKind::maxs:
     return LLVM::AtomicBinOp::max;
   case AtomicRMWKind::maxu:
     return LLVM::AtomicBinOp::umax;
   case AtomicRMWKind::mins:
     return LLVM::AtomicBinOp::min;
   case AtomicRMWKind::minu:
     return LLVM::AtomicBinOp::umin;
   default:
     return llvm::None;
   }
   llvm_unreachable("Invalid AtomicRMWKind");
 }

 namespace {

 struct AtomicRMWOpLowering : public LoadStoreOpLowering<AtomicRMWOp> {
   using Base::Base;

   LogicalResult
   matchAndRewrite(AtomicRMWOp atomicOp, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     if (failed(match(atomicOp)))
       return failure();
     auto maybeKind = matchSimpleAtomicOp(atomicOp);
     if (!maybeKind)
       return failure();
     auto resultType = adaptor.getValue().getType();
     auto memRefType = atomicOp.getMemRefType();
     auto dataPtr =
         getStridedElementPtr(atomicOp.getLoc(), memRefType, adaptor.getMemref(),
                              adaptor.getIndices(), rewriter);
     rewriter.replaceOpWithNewOp<LLVM::AtomicRMWOp>(
         atomicOp, resultType, *maybeKind, dataPtr, adaptor.getValue(),
         LLVM::AtomicOrdering::acq_rel);
     return success();
   }
 };

 /// Wrap a llvm.cmpxchg operation in a while loop so that the operation can be
 /// retried until it succeeds in atomically storing a new value into memory.
 ///
 ///      +---------------------------------+
 ///      |   <code before the AtomicRMWOp> |
 ///      |   <compute initial %loaded>     |
 ///      |   br loop(%loaded)              |
 ///      +---------------------------------+
 ///             |
 ///  -------|   |
 ///  |      v   v
 ///  |   +--------------------------------+
 ///  |   | loop(%loaded):                 |
 ///  |   |   <body contents>              |
 ///  |   |   %pair = cmpxchg              |
 ///  |   |   %ok = %pair[0]               |
 ///  |   |   %new = %pair[1]              |
 ///  |   |   cond_br %ok, end, loop(%new) |
 ///  |   +--------------------------------+
 ///  |          |        |
 ///  |-----------        |
 ///                      v
 ///      +--------------------------------+
 ///      | end:                           |
 ///      |   <code after the AtomicRMWOp> |
 ///      +--------------------------------+
 ///
 struct GenericAtomicRMWOpLowering
     : public LoadStoreOpLowering<GenericAtomicRMWOp> {
   using Base::Base;

   LogicalResult
   matchAndRewrite(GenericAtomicRMWOp atomicOp, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {

     auto loc = atomicOp.getLoc();
     Type valueType = typeConverter->convertType(atomicOp.getResult().getType());

     // Split the block into initial, loop, and ending parts.
     auto *initBlock = rewriter.getInsertionBlock();
     auto *loopBlock =
         rewriter.createBlock(initBlock->getParent(),
                              std::next(Region::iterator(initBlock)), valueType);
     auto *endBlock = rewriter.createBlock(
         loopBlock->getParent(), std::next(Region::iterator(loopBlock)));

     // Operations range to be moved to `endBlock`.
     auto opsToMoveStart = atomicOp->getIterator();
     auto opsToMoveEnd = initBlock->back().getIterator();

     // Compute the loaded value and branch to the loop block.
     rewriter.setInsertionPointToEnd(initBlock);
     auto memRefType = atomicOp.getMemref().getType().cast<MemRefType>();
     auto dataPtr = getStridedElementPtr(loc, memRefType, adaptor.getMemref(),
                                         adaptor.getIndices(), rewriter);
     Value init = rewriter.create<LLVM::LoadOp>(loc, dataPtr);
     rewriter.create<LLVM::BrOp>(loc, init, loopBlock);

     // Prepare the body of the loop block.
     rewriter.setInsertionPointToStart(loopBlock);

     // Clone the GenericAtomicRMWOp region and extract the result.
     auto loopArgument = loopBlock->getArgument(0);
     BlockAndValueMapping mapping;
     mapping.map(atomicOp.getCurrentValue(), loopArgument);
     Block &entryBlock = atomicOp.body().front();
     for (auto &nestedOp : entryBlock.without_terminator()) {
       Operation *clone = rewriter.clone(nestedOp, mapping);
       mapping.map(nestedOp.getResults(), clone->getResults());
     }
     Value result = mapping.lookup(entryBlock.getTerminator()->getOperand(0));

     // Prepare the epilog of the loop block.
     // Append the cmpxchg op to the end of the loop block.
     auto successOrdering = LLVM::AtomicOrdering::acq_rel;
     auto failureOrdering = LLVM::AtomicOrdering::monotonic;
     auto boolType = IntegerType::get(rewriter.getContext(), 1);
     auto pairType = LLVM::LLVMStructType::getLiteral(rewriter.getContext(),
                                                      {valueType, boolType});
     auto cmpxchg = rewriter.create<LLVM::AtomicCmpXchgOp>(
         loc, pairType, dataPtr, loopArgument, result, successOrdering,
         failureOrdering);
     // Extract the %new_loaded and %ok values from the pair.
     Value newLoaded = rewriter.create<LLVM::ExtractValueOp>(
         loc, valueType, cmpxchg, rewriter.getI64ArrayAttr({0}));
     Value ok = rewriter.create<LLVM::ExtractValueOp>(
         loc, boolType, cmpxchg, rewriter.getI64ArrayAttr({1}));

     // Conditionally branch to the end or back to the loop depending on %ok.
     rewriter.create<LLVM::CondBrOp>(loc, ok, endBlock, ArrayRef<Value>(),
                                     loopBlock, newLoaded);

     rewriter.setInsertionPointToEnd(endBlock);
     moveOpsRange(atomicOp.getResult(), newLoaded, std::next(opsToMoveStart),
                  std::next(opsToMoveEnd), rewriter);

     // The 'result' of the atomic_rmw op is the newly loaded value.
     rewriter.replaceOp(atomicOp, {newLoaded});

     return success();
   }

 private:
   // Clones a segment of ops [start, end) and erases the original.
   void moveOpsRange(ValueRange oldResult, ValueRange newResult,
                     Block::iterator start, Block::iterator end,
                     ConversionPatternRewriter &rewriter) const {
     BlockAndValueMapping mapping;
     mapping.map(oldResult, newResult);
     SmallVector<Operation *, 2> opsToErase;
     for (auto it = start; it != end; ++it) {
       rewriter.clone(*it, mapping);
       opsToErase.push_back(&*it);
     }
     for (auto *it : opsToErase)
       rewriter.eraseOp(it);
   }
 };

 } // namespace

 void mlir::populateStdToLLVMFuncOpConversionPattern(
     LLVMTypeConverter &converter, RewritePatternSet &patterns) {
   if (converter.getOptions().useBarePtrCallConv)
     patterns.add<BarePtrFuncOpConversion>(converter);
   else
     patterns.add<FuncOpConversion>(converter);
 }

 void mlir::populateStdToLLVMConversionPatterns(LLVMTypeConverter &converter,
                                                RewritePatternSet &patterns) {
   populateStdToLLVMFuncOpConversionPattern(converter, patterns);
   // clang-format off
   patterns.add<
       AssertOpLowering,
       AtomicRMWOpLowering,
       BranchOpLowering,
       CallIndirectOpLowering,
       CallOpLowering,
       CondBranchOpLowering,
       ConstantOpLowering,
       GenericAtomicRMWOpLowering,
       RankOpLowering,
       ReturnOpLowering,
       SelectOpLowering,
       SplatOpLowering,
       SplatNdOpLowering,
       SwitchOpLowering>(converter);
   // clang-format on
 }

 namespace {
 /// A pass converting MLIR operations into the LLVM IR dialect.
 struct LLVMLoweringPass : public ConvertStandardToLLVMBase<LLVMLoweringPass> {
   LLVMLoweringPass() = default;
   LLVMLoweringPass(bool useBarePtrCallConv, bool emitCWrappers,
                    unsigned indexBitwidth, bool useAlignedAlloc,
                    const llvm::DataLayout &dataLayout) {
     this->useBarePtrCallConv = useBarePtrCallConv;
     this->emitCWrappers = emitCWrappers;
     this->indexBitwidth = indexBitwidth;
     this->dataLayout = dataLayout.getStringRepresentation();
   }

   /// Run the dialect converter on the module.
   void runOnOperation() override {
     if (useBarePtrCallConv && emitCWrappers) {
       getOperation().emitError()
           << "incompatible conversion options: bare-pointer calling convention "
              "and C wrapper emission";
       signalPassFailure();
       return;
     }
     if (failed(LLVM::LLVMDialect::verifyDataLayoutString(
             this->dataLayout, [this](const Twine &message) {
               getOperation().emitError() << message.str();
             }))) {
       signalPassFailure();
       return;
     }

     ModuleOp m = getOperation();
     const auto &dataLayoutAnalysis = getAnalysis<DataLayoutAnalysis>();

     LowerToLLVMOptions options(&getContext(),
                                dataLayoutAnalysis.getAtOrAbove(m));
     options.useBarePtrCallConv = useBarePtrCallConv;
     options.emitCWrappers = emitCWrappers;
     if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
       options.overrideIndexBitwidth(indexBitwidth);
     options.dataLayout = llvm::DataLayout(this->dataLayout);

     LLVMTypeConverter typeConverter(&getContext(), options,
                                     &dataLayoutAnalysis);

     RewritePatternSet patterns(&getContext());
     populateStdToLLVMConversionPatterns(typeConverter, patterns);
     arith::populateArithmeticToLLVMConversionPatterns(typeConverter, patterns);

     LLVMConversionTarget target(getContext());
     if (failed(applyPartialConversion(m, target, std::move(patterns))))
       signalPassFailure();

     m->setAttr(LLVM::LLVMDialect::getDataLayoutAttrName(),
                StringAttr::get(m.getContext(), this->dataLayout));
   }
 };
 } // end namespace

 std::unique_ptr<OperationPass<ModuleOp>> mlir::createLowerToLLVMPass() {
   return std::make_unique<LLVMLoweringPass>();
 }

 std::unique_ptr<OperationPass<ModuleOp>>
 mlir::createLowerToLLVMPass(const LowerToLLVMOptions &options) {
   auto allocLowering = options.allocLowering;
   // There is no way to provide additional patterns for pass, so
   // AllocLowering::None will always fail.
   assert(allocLowering != LowerToLLVMOptions::AllocLowering::None &&
          "LLVMLoweringPass doesn't support AllocLowering::None");
   bool useAlignedAlloc =
       (allocLowering == LowerToLLVMOptions::AllocLowering::AlignedAlloc);
   return std::make_unique<LLVMLoweringPass>(
       options.useBarePtrCallConv, options.emitCWrappers,
       options.getIndexBitwidth(), useAlignedAlloc, options.dataLayout);
 }