[SPIR-V] Improve type inference: fix types of return values in call lowering (#116609)

Goals of the PR are:
* to ensure that correct types are applied to virtual registers which
were used as return values in call lowering. A reproducer is attached as
a new test case, before the PR it fails because spirv-val considers
output invalid due to wrong result/operand types in OpPhi's;
* improve type inference by speeding up postprocessing of types: by
limiting iterations by checking what remains to process, and processing
each instruction just once for any number of operands with uncomplete
types;
* improve type inference by more accurate work with uncomplete types
(pass uncomplete property to dependent operands, ensure consistency of
uncomplete-types data structure);
* change processing order and add traversing of PHI nodes when type
inference apply instructions results to specify/update/cast operands
type (fixes an issue with OpPhi's result type mismatch with operand
types).
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 73dce23..a1684b8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -469,12 +469,8 @@
                               MachineIRBuilder &MIRBuilder,
                               SPIRVGlobalRegistry *GR, LLT LowLevelType,
                               Register DestinationReg = Register(0)) {
-  MachineRegisterInfo *MRI = MIRBuilder.getMRI();
-  if (!DestinationReg.isValid()) {
-    DestinationReg = MRI->createVirtualRegister(&SPIRV::iIDRegClass);
-    MRI->setType(DestinationReg, LLT::scalar(64));
-    GR->assignSPIRVTypeToVReg(BaseType, DestinationReg, MIRBuilder.getMF());
-  }
+  if (!DestinationReg.isValid())
+    DestinationReg = createVirtualRegister(BaseType, GR, MIRBuilder);
   // TODO: consider using correct address space and alignment (p0 is canonical
   // type for selection though).
   MachinePointerInfo PtrInfo = MachinePointerInfo();
@@ -2151,7 +2147,7 @@
     const SPIRVType *PointerSizeTy = GR->getOrCreateSPIRVPointerType(
         Int32Ty, MIRBuilder, SPIRV::StorageClass::Function);
     for (unsigned I = 0; I < LocalSizeNum; ++I) {
-      Register Reg = MRI->createVirtualRegister(&SPIRV::iIDRegClass);
+      Register Reg = MRI->createVirtualRegister(&SPIRV::pIDRegClass);
       MRI->setType(Reg, LLType);
       GR->assignSPIRVTypeToVReg(PointerSizeTy, Reg, MIRBuilder.getMF());
       auto GEPInst = MIRBuilder.buildIntrinsic(
@@ -2539,23 +2535,11 @@
                                  SPIRVGlobalRegistry *GR) {
   LLVM_DEBUG(dbgs() << "Lowering builtin call: " << DemangledCall << "\n");
 
-  // SPIR-V type and return register.
-  Register ReturnRegister = OrigRet;
-  SPIRVType *ReturnType = nullptr;
-  if (OrigRetTy && !OrigRetTy->isVoidTy()) {
-    ReturnType = GR->assignTypeToVReg(OrigRetTy, ReturnRegister, MIRBuilder);
-    if (!MIRBuilder.getMRI()->getRegClassOrNull(ReturnRegister))
-      MIRBuilder.getMRI()->setRegClass(ReturnRegister,
-                                       GR->getRegClass(ReturnType));
-  } else if (OrigRetTy && OrigRetTy->isVoidTy()) {
-    ReturnRegister = MIRBuilder.getMRI()->createVirtualRegister(&IDRegClass);
-    MIRBuilder.getMRI()->setType(ReturnRegister, LLT::scalar(64));
-    ReturnType = GR->assignTypeToVReg(OrigRetTy, ReturnRegister, MIRBuilder);
-  }
-
   // Lookup the builtin in the TableGen records.
+  SPIRVType *SpvType = GR->getSPIRVTypeForVReg(OrigRet);
+  assert(SpvType && "Inconsistent return register: expected valid type info");
   std::unique_ptr<const IncomingCall> Call =
-      lookupBuiltin(DemangledCall, Set, ReturnRegister, ReturnType, Args);
+      lookupBuiltin(DemangledCall, Set, OrigRet, SpvType, Args);
 
   if (!Call) {
     LLVM_DEBUG(dbgs() << "Builtin record was not found!\n");
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 3c53973..3fdaa6a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -539,6 +539,23 @@
 
   if (isFunctionDecl && !DemangledName.empty() &&
       (canUseGLSL || canUseOpenCL)) {
+    if (ResVReg.isValid()) {
+      if (!GR->getSPIRVTypeForVReg(ResVReg)) {
+        const Type *RetTy = OrigRetTy;
+        if (auto *PtrRetTy = dyn_cast<PointerType>(OrigRetTy)) {
+          const Value *OrigValue = Info.OrigRet.OrigValue;
+          if (!OrigValue)
+            OrigValue = Info.CB;
+          if (OrigValue)
+            if (Type *ElemTy = GR->findDeducedElementType(OrigValue))
+              RetTy =
+                  TypedPointerType::get(ElemTy, PtrRetTy->getAddressSpace());
+        }
+        setRegClassType(ResVReg, RetTy, GR, MIRBuilder);
+      }
+    } else {
+      ResVReg = createVirtualRegister(OrigRetTy, GR, MIRBuilder);
+    }
     SmallVector<Register, 8> ArgVRegs;
     for (auto Arg : Info.OrigArgs) {
       assert(Arg.Regs.size() == 1 && "Call arg has multiple VRegs");
diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
index b82c253..48df845 100644
--- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
@@ -69,16 +69,31 @@
         MachineOperand *RegOp = &VRegDef->getOperand(0);
         if (Reg2Entry.count(RegOp) == 0 &&
             (MI->getOpcode() != SPIRV::OpVariable || i != 3)) {
-          std::string DiagMsg;
-          raw_string_ostream OS(DiagMsg);
-          OS << "Unexpected pattern while building a dependency "
-                "graph.\nInstruction: ";
-          MI->print(OS);
-          OS << "Operand: ";
-          Op.print(OS);
-          OS << "\nOperand definition: ";
-          VRegDef->print(OS);
-          report_fatal_error(DiagMsg.c_str());
+          // try to repair the unexpected code pattern
+          bool IsFixed = false;
+          if (VRegDef->getOpcode() == TargetOpcode::G_CONSTANT &&
+              RegOp->isReg() && MRI.getType(RegOp->getReg()).isScalar()) {
+            const Constant *C = VRegDef->getOperand(1).getCImm();
+            add(C, MI->getParent()->getParent(), RegOp->getReg());
+            auto Iter = CT.Storage.find(C);
+            if (Iter != CT.Storage.end()) {
+              SPIRV::DTSortableEntry &MissedEntry = Iter->second;
+              Reg2Entry[RegOp] = &MissedEntry;
+              IsFixed = true;
+            }
+          }
+          if (!IsFixed) {
+            std::string DiagMsg;
+            raw_string_ostream OS(DiagMsg);
+            OS << "Unexpected pattern while building a dependency "
+                  "graph.\nInstruction: ";
+            MI->print(OS);
+            OS << "Operand: ";
+            Op.print(OS);
+            OS << "\nOperand definition: ";
+            VRegDef->print(OS);
+            report_fatal_error(DiagMsg.c_str());
+          }
         }
         if (Reg2Entry.count(RegOp))
           E->addDep(Reg2Entry[RegOp]);
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index e6ef40e..e6f136c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -17,6 +17,7 @@
 #include "SPIRVSubtarget.h"
 #include "SPIRVTargetMachine.h"
 #include "SPIRVUtils.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstVisitor.h"
@@ -67,7 +68,7 @@
       public InstVisitor<SPIRVEmitIntrinsics, Instruction *> {
   SPIRVTargetMachine *TM = nullptr;
   SPIRVGlobalRegistry *GR = nullptr;
-  Function *F = nullptr;
+  Function *CurrF = nullptr;
   bool TrackConstants = true;
   bool HaveFunPtrs = false;
   DenseMap<Instruction *, Constant *> AggrConsts;
@@ -76,8 +77,33 @@
   SPIRV::InstructionSet::InstructionSet InstrSet;
 
   // a register of Instructions that don't have a complete type definition
-  DenseMap<Value *, unsigned> UncompleteTypeInfo;
-  SmallVector<Value *> PostprocessWorklist;
+  bool CanTodoType = true;
+  unsigned TodoTypeSz = 0;
+  DenseMap<Value *, bool> TodoType;
+  void insertTodoType(Value *Op) {
+    // TODO: add isa<CallInst>(Op) to no-insert
+    if (CanTodoType && !isa<GetElementPtrInst>(Op)) {
+      auto It = TodoType.try_emplace(Op, true);
+      if (It.second)
+        ++TodoTypeSz;
+    }
+  }
+  void eraseTodoType(Value *Op) {
+    auto It = TodoType.find(Op);
+    if (It != TodoType.end() && It->second) {
+      TodoType[Op] = false;
+      --TodoTypeSz;
+    }
+  }
+  bool isTodoType(Value *Op) {
+    if (isa<GetElementPtrInst>(Op))
+      return false;
+    auto It = TodoType.find(Op);
+    return It != TodoType.end() && It->second;
+  }
+  // a register of Instructions that were visited by deduceOperandElementType()
+  // to validate operand types with an instruction
+  std::unordered_set<Instruction *> TypeValidated;
 
   // well known result types of builtins
   enum WellKnownTypes { Event };
@@ -86,7 +112,8 @@
   Type *deduceElementType(Value *I, bool UnknownElemTypeI8);
   Type *deduceElementTypeHelper(Value *I, bool UnknownElemTypeI8);
   Type *deduceElementTypeHelper(Value *I, std::unordered_set<Value *> &Visited,
-                                bool UnknownElemTypeI8);
+                                bool UnknownElemTypeI8,
+                                bool IgnoreKnownType = false);
   Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
                                      bool UnknownElemTypeI8);
   Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
@@ -105,8 +132,10 @@
                                bool UnknownElemTypeI8);
 
   // deduce Types of operands of the Instruction if possible
-  void deduceOperandElementType(Instruction *I, Instruction *AskOp = 0,
-                                Type *AskTy = 0, CallInst *AssignCI = 0);
+  void deduceOperandElementType(Instruction *I,
+                                SmallPtrSet<Instruction *, 4> *UncompleteRets,
+                                const SmallPtrSet<Value *, 4> *AskOps = nullptr,
+                                bool IsPostprocessing = false);
 
   void preprocessCompositeConstants(IRBuilder<> &B);
   void preprocessUndefs(IRBuilder<> &B);
@@ -122,6 +151,9 @@
     return B.CreateIntrinsic(IntrID, {Types}, Args);
   }
 
+  Type *reconstructType(Value *Op, bool UnknownElemTypeI8,
+                        bool IsPostprocessing);
+
   void buildAssignType(IRBuilder<> &B, Type *ElemTy, Value *Arg);
   void buildAssignPtr(IRBuilder<> &B, Type *ElemTy, Value *Arg);
   void updateAssignType(CallInst *AssignCI, Value *Arg, Value *OfType);
@@ -145,12 +177,31 @@
   Type *deduceFunParamElementType(Function *F, unsigned OpIdx);
   Type *deduceFunParamElementType(Function *F, unsigned OpIdx,
                                   std::unordered_set<Function *> &FVisited);
-  void replaceWithPtrcasted(Instruction *CI, Type *NewElemTy, Type *KnownElemTy,
-                            CallInst *AssignCI);
+
+  bool deduceOperandElementTypeCalledFunction(
+      CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+      Type *&KnownElemTy);
+  void deduceOperandElementTypeFunctionPointer(
+      CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+      Type *&KnownElemTy, bool IsPostprocessing);
+
+  CallInst *buildSpvPtrcast(Function *F, Value *Op, Type *ElemTy);
+  void replaceUsesOfWithSpvPtrcast(Value *Op, Type *ElemTy, Instruction *I,
+                                   DenseMap<Function *, CallInst *> Ptrcasts);
+  void propagateElemType(Value *Op, Type *ElemTy,
+                         DenseSet<std::pair<Value *, Value *>> &VisitedSubst);
+  void
+  propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy,
+                       DenseSet<std::pair<Value *, Value *>> &VisitedSubst);
+  void propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy,
+                            DenseSet<std::pair<Value *, Value *>> &VisitedSubst,
+                            std::unordered_set<Value *> &Visited,
+                            DenseMap<Function *, CallInst *> Ptrcasts);
+
   void replaceAllUsesWith(Value *Src, Value *Dest, bool DeleteOld = true);
 
   bool runOnFunction(Function &F);
-  bool postprocessTypes();
+  bool postprocessTypes(Module &M);
   bool processFunctionPointers(Module &M);
 
 public:
@@ -203,10 +254,8 @@
 }
 
 bool allowEmitFakeUse(const Value *Arg) {
-  if (const auto *II = dyn_cast<IntrinsicInst>(Arg))
-    if (Function *F = II->getCalledFunction())
-      if (F->getName().starts_with("llvm.spv."))
-        return false;
+  if (isSpvIntrinsic(Arg))
+    return false;
   if (dyn_cast<AtomicCmpXchgInst>(Arg) || dyn_cast<InsertValueInst>(Arg) ||
       dyn_cast<UndefValue>(Arg))
     return false;
@@ -280,17 +329,10 @@
   GR->updateIfExistDeducedElementType(Src, Dest, DeleteOld);
   GR->updateIfExistAssignPtrTypeInstr(Src, Dest, DeleteOld);
   // Update uncomplete type records if any
-  auto It = UncompleteTypeInfo.find(Src);
-  if (It == UncompleteTypeInfo.end())
-    return;
-  if (DeleteOld) {
-    unsigned Pos = It->second;
-    UncompleteTypeInfo.erase(Src);
-    UncompleteTypeInfo[Dest] = Pos;
-    PostprocessWorklist[Pos] = Dest;
-  } else {
-    UncompleteTypeInfo[Dest] = PostprocessWorklist.size();
-    PostprocessWorklist.push_back(Dest);
+  if (isTodoType(Src)) {
+    if (DeleteOld)
+      eraseTodoType(Src);
+    insertTodoType(Dest);
   }
 }
 
@@ -314,8 +356,11 @@
 
 // Reconstruct type with nested element types according to deduced type info.
 // Return nullptr if no detailed type info is available.
-static inline Type *reconstructType(SPIRVGlobalRegistry *GR, Value *Op) {
+Type *SPIRVEmitIntrinsics::reconstructType(Value *Op, bool UnknownElemTypeI8,
+                                           bool IsPostprocessing) {
   Type *Ty = Op->getType();
+  if (auto *OpI = dyn_cast<Instruction>(Op))
+    Ty = restoreMutatedType(GR, OpI, Ty);
   if (!isUntypedPointerTy(Ty))
     return Ty;
   // try to find the pointee type
@@ -323,10 +368,17 @@
     return getTypedPointerWrapper(NestedTy, getPointerAddressSpace(Ty));
   // not a pointer according to the type info (e.g., Event object)
   CallInst *CI = GR->findAssignPtrTypeInstr(Op);
-  if (!CI)
-    return nullptr;
-  MetadataAsValue *MD = cast<MetadataAsValue>(CI->getArgOperand(1));
-  return cast<ConstantAsMetadata>(MD->getMetadata())->getType();
+  if (CI) {
+    MetadataAsValue *MD = cast<MetadataAsValue>(CI->getArgOperand(1));
+    return cast<ConstantAsMetadata>(MD->getMetadata())->getType();
+  }
+  if (UnknownElemTypeI8) {
+    if (!IsPostprocessing)
+      insertTodoType(Op);
+    return getTypedPointerWrapper(IntegerType::getInt8Ty(Op->getContext()),
+                                  getPointerAddressSpace(Ty));
+  }
+  return nullptr;
 }
 
 void SPIRVEmitIntrinsics::buildAssignType(IRBuilder<> &B, Type *Ty,
@@ -354,7 +406,7 @@
   Value *OfType = PoisonValue::get(ElemTy);
   CallInst *AssignPtrTyCI = GR->findAssignPtrTypeInstr(Arg);
   if (AssignPtrTyCI == nullptr ||
-      AssignPtrTyCI->getParent()->getParent() != F) {
+      AssignPtrTyCI->getParent()->getParent() != CurrF) {
     AssignPtrTyCI = buildIntrWithMD(
         Intrinsic::spv_assign_ptr_type, {Arg->getType()}, OfType, Arg,
         {B.getInt32(getPointerAddressSpace(Arg->getType()))}, B);
@@ -379,8 +431,97 @@
   GR->addDeducedElementType(Arg, ElemTy);
 }
 
+CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Function *F, Value *Op,
+                                               Type *ElemTy) {
+  IRBuilder<> B(Op->getContext());
+  if (auto *OpI = dyn_cast<Instruction>(Op)) {
+    // spv_ptrcast's argument Op denotes an instruction that generates
+    // a value, and we may use getInsertionPointAfterDef()
+    setInsertPointAfterDef(B, OpI);
+  } else if (auto *OpA = dyn_cast<Argument>(Op)) {
+    B.SetInsertPointPastAllocas(OpA->getParent());
+    B.SetCurrentDebugLocation(DebugLoc());
+  } else {
+    B.SetInsertPoint(F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
+  }
+  Type *OpTy = Op->getType();
+  SmallVector<Type *, 2> Types = {OpTy, OpTy};
+  SmallVector<Value *, 2> Args = {Op, buildMD(PoisonValue::get(ElemTy)),
+                                  B.getInt32(getPointerAddressSpace(OpTy))};
+  CallInst *PtrCasted =
+      B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
+  buildAssignPtr(B, ElemTy, PtrCasted);
+  return PtrCasted;
+}
+
+void SPIRVEmitIntrinsics::replaceUsesOfWithSpvPtrcast(
+    Value *Op, Type *ElemTy, Instruction *I,
+    DenseMap<Function *, CallInst *> Ptrcasts) {
+  Function *F = I->getParent()->getParent();
+  CallInst *PtrCastedI = nullptr;
+  auto It = Ptrcasts.find(F);
+  if (It == Ptrcasts.end()) {
+    PtrCastedI = buildSpvPtrcast(F, Op, ElemTy);
+    Ptrcasts[F] = PtrCastedI;
+  } else {
+    PtrCastedI = It->second;
+  }
+  I->replaceUsesOfWith(Op, PtrCastedI);
+}
+
+void SPIRVEmitIntrinsics::propagateElemType(
+    Value *Op, Type *ElemTy,
+    DenseSet<std::pair<Value *, Value *>> &VisitedSubst) {
+  DenseMap<Function *, CallInst *> Ptrcasts;
+  SmallVector<User *> Users(Op->users());
+  for (auto *U : Users) {
+    if (!isa<Instruction>(U) || isa<BitCastInst>(U) || isSpvIntrinsic(U))
+      continue;
+    if (!VisitedSubst.insert(std::make_pair(U, Op)).second)
+      continue;
+    Instruction *UI = dyn_cast<Instruction>(U);
+    // If the instruction was validated already, we need to keep it valid by
+    // keeping current Op type.
+    if (isa<GetElementPtrInst>(UI) ||
+        TypeValidated.find(UI) != TypeValidated.end())
+      replaceUsesOfWithSpvPtrcast(Op, ElemTy, UI, Ptrcasts);
+  }
+}
+
+void SPIRVEmitIntrinsics::propagateElemTypeRec(
+    Value *Op, Type *PtrElemTy, Type *CastElemTy,
+    DenseSet<std::pair<Value *, Value *>> &VisitedSubst) {
+  std::unordered_set<Value *> Visited;
+  DenseMap<Function *, CallInst *> Ptrcasts;
+  propagateElemTypeRec(Op, PtrElemTy, CastElemTy, VisitedSubst, Visited,
+                       Ptrcasts);
+}
+
+void SPIRVEmitIntrinsics::propagateElemTypeRec(
+    Value *Op, Type *PtrElemTy, Type *CastElemTy,
+    DenseSet<std::pair<Value *, Value *>> &VisitedSubst,
+    std::unordered_set<Value *> &Visited,
+    DenseMap<Function *, CallInst *> Ptrcasts) {
+  if (!Visited.insert(Op).second)
+    return;
+  SmallVector<User *> Users(Op->users());
+  for (auto *U : Users) {
+    if (!isa<Instruction>(U) || isa<BitCastInst>(U) || isSpvIntrinsic(U))
+      continue;
+    if (!VisitedSubst.insert(std::make_pair(U, Op)).second)
+      continue;
+    Instruction *UI = dyn_cast<Instruction>(U);
+    // If the instruction was validated already, we need to keep it valid by
+    // keeping current Op type.
+    if (isa<GetElementPtrInst>(UI) ||
+        TypeValidated.find(UI) != TypeValidated.end())
+      replaceUsesOfWithSpvPtrcast(Op, CastElemTy, UI, Ptrcasts);
+  }
+}
+
 // Set element pointer type to the given value of ValueTy and tries to
 // specify this type further (recursively) by Operand value, if needed.
+
 Type *
 SPIRVEmitIntrinsics::deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand,
                                                   bool UnknownElemTypeI8) {
@@ -455,23 +596,22 @@
   if (isUntypedPointerTy(RefTy)) {
     if (!UnknownElemTypeI8)
       return;
-    if (auto *I = dyn_cast<Instruction>(Op)) {
-      UncompleteTypeInfo[I] = PostprocessWorklist.size();
-      PostprocessWorklist.push_back(I);
-    }
+    insertTodoType(Op);
   }
   Ty = RefTy;
 }
 
 Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
-    Value *I, std::unordered_set<Value *> &Visited, bool UnknownElemTypeI8) {
+    Value *I, std::unordered_set<Value *> &Visited, bool UnknownElemTypeI8,
+    bool IgnoreKnownType) {
   // allow to pass nullptr as an argument
   if (!I)
     return nullptr;
 
   // maybe already known
-  if (Type *KnownTy = GR->findDeducedElementType(I))
-    return KnownTy;
+  if (!IgnoreKnownType)
+    if (Type *KnownTy = GR->findDeducedElementType(I))
+      return KnownTy;
 
   // maybe a cycle
   if (!Visited.insert(I).second)
@@ -483,7 +623,22 @@
   if (auto *Ref = dyn_cast<AllocaInst>(I)) {
     maybeAssignPtrType(Ty, I, Ref->getAllocatedType(), UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
-    Ty = Ref->getResultElementType();
+    // TODO: not sure if GetElementPtrInst::getTypeAtIndex() does anything
+    // useful here
+    if (isNestedPointer(Ref->getSourceElementType())) {
+      Ty = Ref->getSourceElementType();
+      for (Use &U : drop_begin(Ref->indices()))
+        Ty = GetElementPtrInst::getTypeAtIndex(Ty, U.get());
+    } else {
+      Ty = Ref->getResultElementType();
+    }
+  } else if (auto *Ref = dyn_cast<LoadInst>(I)) {
+    Value *Op = Ref->getPointerOperand();
+    Type *KnownTy = GR->findDeducedElementType(Op);
+    if (!KnownTy)
+      KnownTy = Op->getType();
+    if (Type *ElemTy = getPointeeType(KnownTy))
+      maybeAssignPtrType(Ty, I, ElemTy, UnknownElemTypeI8);
   } else if (auto *Ref = dyn_cast<GlobalValue>(I)) {
     Ty = deduceElementTypeByValueDeep(
         Ref->getValueType(),
@@ -559,7 +714,7 @@
   }
 
   // remember the found relationship
-  if (Ty) {
+  if (Ty && !IgnoreKnownType) {
     // specify nested types if needed, otherwise return unchanged
     GR->addDeducedElementType(I, Ty);
   }
@@ -601,7 +756,7 @@
         if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
           if (Type *NestedTy =
                   deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8))
-            Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace());
+            Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace());
         } else {
           Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited,
                                       UnknownElemTypeI8);
@@ -622,7 +777,7 @@
       if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
         if (Type *NestedTy =
                 deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8))
-          Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace());
+          Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace());
       } else {
         Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited,
                                     UnknownElemTypeI8);
@@ -661,10 +816,7 @@
     return Ty;
   if (!UnknownElemTypeI8)
     return nullptr;
-  if (auto *Instr = dyn_cast<Instruction>(I)) {
-    UncompleteTypeInfo[Instr] = PostprocessWorklist.size();
-    PostprocessWorklist.push_back(Instr);
-  }
+  insertTodoType(I);
   return IntegerType::getInt8Ty(I->getContext());
 }
 
@@ -683,10 +835,9 @@
 
 // Try to deduce element type for a call base. Returns false if this is an
 // indirect function invocation, and true otherwise.
-static bool deduceOperandElementTypeCalledFunction(
-    SPIRVGlobalRegistry *GR, Instruction *I,
-    SPIRV::InstructionSet::InstructionSet InstrSet, CallInst *CI,
-    SmallVector<std::pair<Value *, unsigned>> &Ops, Type *&KnownElemTy) {
+bool SPIRVEmitIntrinsics::deduceOperandElementTypeCalledFunction(
+    CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+    Type *&KnownElemTy) {
   Function *CalledF = CI->getCalledFunction();
   if (!CalledF)
     return false;
@@ -726,7 +877,7 @@
       case SPIRV::OpAtomicUMax:
       case SPIRV::OpAtomicSMin:
       case SPIRV::OpAtomicSMax: {
-        KnownElemTy = getAtomicElemTy(GR, I, Op);
+        KnownElemTy = getAtomicElemTy(GR, CI, Op);
         if (!KnownElemTy)
           return true;
         Ops.push_back(std::make_pair(Op, 0));
@@ -738,32 +889,44 @@
 }
 
 // Try to deduce element type for a function pointer.
-static void deduceOperandElementTypeFunctionPointer(
-    SPIRVGlobalRegistry *GR, Instruction *I, CallInst *CI,
-    SmallVector<std::pair<Value *, unsigned>> &Ops, Type *&KnownElemTy) {
+void SPIRVEmitIntrinsics::deduceOperandElementTypeFunctionPointer(
+    CallInst *CI, SmallVector<std::pair<Value *, unsigned>> &Ops,
+    Type *&KnownElemTy, bool IsPostprocessing) {
   Value *Op = CI->getCalledOperand();
   if (!Op || !isPointerTy(Op->getType()))
     return;
   Ops.push_back(std::make_pair(Op, std::numeric_limits<unsigned>::max()));
   FunctionType *FTy = CI->getFunctionType();
-  bool IsNewFTy = false;
+  bool IsNewFTy = false, IsUncomplete = false;
   SmallVector<Type *, 4> ArgTys;
   for (Value *Arg : CI->args()) {
     Type *ArgTy = Arg->getType();
-    if (ArgTy->isPointerTy())
+    if (ArgTy->isPointerTy()) {
       if (Type *ElemTy = GR->findDeducedElementType(Arg)) {
         IsNewFTy = true;
-        ArgTy = TypedPointerType::get(ElemTy, getPointerAddressSpace(ArgTy));
+        ArgTy = getTypedPointerWrapper(ElemTy, getPointerAddressSpace(ArgTy));
+        if (isTodoType(Arg))
+          IsUncomplete = true;
+      } else {
+        IsUncomplete = true;
       }
+    }
     ArgTys.push_back(ArgTy);
   }
   Type *RetTy = FTy->getReturnType();
-  if (I->getType()->isPointerTy())
-    if (Type *ElemTy = GR->findDeducedElementType(I)) {
+  if (CI->getType()->isPointerTy()) {
+    if (Type *ElemTy = GR->findDeducedElementType(CI)) {
       IsNewFTy = true;
       RetTy =
-          TypedPointerType::get(ElemTy, getPointerAddressSpace(I->getType()));
+          getTypedPointerWrapper(ElemTy, getPointerAddressSpace(CI->getType()));
+      if (isTodoType(CI))
+        IsUncomplete = true;
+    } else {
+      IsUncomplete = true;
     }
+  }
+  if (!IsPostprocessing && IsUncomplete)
+    insertTodoType(Op);
   KnownElemTy =
       IsNewFTy ? FunctionType::get(RetTy, ArgTys, FTy->isVarArg()) : FTy;
 }
@@ -772,17 +935,18 @@
 // tries to deduce them. If the Instruction has Pointer operands with known
 // types which differ from expected, this function tries to insert a bitcast to
 // resolve the issue.
-void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I,
-                                                   Instruction *AskOp,
-                                                   Type *AskTy,
-                                                   CallInst *AskCI) {
+void SPIRVEmitIntrinsics::deduceOperandElementType(
+    Instruction *I, SmallPtrSet<Instruction *, 4> *UncompleteRets,
+    const SmallPtrSet<Value *, 4> *AskOps, bool IsPostprocessing) {
   SmallVector<std::pair<Value *, unsigned>> Ops;
   Type *KnownElemTy = nullptr;
+  bool Uncomplete = false;
   // look for known basic patterns of type inference
   if (auto *Ref = dyn_cast<PHINode>(I)) {
     if (!isPointerTy(I->getType()) ||
         !(KnownElemTy = GR->findDeducedElementType(I)))
       return;
+    Uncomplete = isTodoType(I);
     for (unsigned i = 0; i < Ref->getNumIncomingValues(); i++) {
       Value *Op = Ref->getIncomingValue(i);
       if (isPointerTy(Op->getType()))
@@ -792,14 +956,12 @@
     KnownElemTy = GR->findDeducedElementType(I);
     if (!KnownElemTy)
       return;
+    Uncomplete = isTodoType(I);
     Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0));
   } else if (auto *Ref = dyn_cast<GetElementPtrInst>(I)) {
+    if (GR->findDeducedElementType(Ref->getPointerOperand()))
+      return;
     KnownElemTy = Ref->getSourceElementType();
-    if (isUntypedPointerTy(KnownElemTy))
-      return;
-    Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand());
-    if (PointeeTy && !isUntypedPointerTy(PointeeTy))
-      return;
     Ops.push_back(std::make_pair(Ref->getPointerOperand(),
                                  GetElementPtrInst::getPointerOperandIndex()));
   } else if (auto *Ref = dyn_cast<LoadInst>(I)) {
@@ -812,9 +974,8 @@
     Ops.push_back(std::make_pair(Ref->getPointerOperand(),
                                  LoadInst::getPointerOperandIndex()));
   } else if (auto *Ref = dyn_cast<StoreInst>(I)) {
-    if (IsKernelArgInt8(Ref->getParent()->getParent(), Ref))
-      return;
-    if (!(KnownElemTy = reconstructType(GR, Ref->getValueOperand())))
+    if (!(KnownElemTy =
+              reconstructType(Ref->getValueOperand(), false, IsPostprocessing)))
       return;
     Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand());
     if (PointeeTy && !isUntypedPointerTy(PointeeTy))
@@ -837,27 +998,54 @@
     if (!isPointerTy(I->getType()) ||
         !(KnownElemTy = GR->findDeducedElementType(I)))
       return;
+    Uncomplete = isTodoType(I);
     for (unsigned i = 0; i < Ref->getNumOperands(); i++) {
       Value *Op = Ref->getOperand(i);
       if (isPointerTy(Op->getType()))
         Ops.push_back(std::make_pair(Op, i));
     }
   } else if (auto *Ref = dyn_cast<ReturnInst>(I)) {
-    Type *RetTy = F->getReturnType();
+    Type *RetTy = CurrF->getReturnType();
     if (!isPointerTy(RetTy))
       return;
     Value *Op = Ref->getReturnValue();
     if (!Op)
       return;
-    if (!(KnownElemTy = GR->findDeducedElementType(F))) {
+    if (!(KnownElemTy = GR->findDeducedElementType(CurrF))) {
       if (Type *OpElemTy = GR->findDeducedElementType(Op)) {
-        GR->addDeducedElementType(F, OpElemTy);
-        TypedPointerType *DerivedTy =
-            TypedPointerType::get(OpElemTy, getPointerAddressSpace(RetTy));
-        GR->addReturnType(F, DerivedTy);
+        GR->addDeducedElementType(CurrF, OpElemTy);
+        GR->addReturnType(CurrF, TypedPointerType::get(
+                                     OpElemTy, getPointerAddressSpace(RetTy)));
+        // non-recursive update of types in function uses
+        DenseSet<std::pair<Value *, Value *>> VisitedSubst{
+            std::make_pair(I, Op)};
+        for (User *U : CurrF->users()) {
+          CallInst *CI = dyn_cast<CallInst>(U);
+          if (!CI || CI->getCalledFunction() != CurrF)
+            continue;
+          if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(CI)) {
+            if (Type *PrevElemTy = GR->findDeducedElementType(CI)) {
+              updateAssignType(AssignCI, CI, PoisonValue::get(OpElemTy));
+              propagateElemType(CI, PrevElemTy, VisitedSubst);
+            }
+          }
+        }
+        TypeValidated.insert(I);
+        // Non-recursive update of types in the function uncomplete returns.
+        // This may happen just once per a function, the latch is a pair of
+        // findDeducedElementType(F) / addDeducedElementType(F, ...).
+        // With or without the latch it is a non-recursive call due to
+        // UncompleteRets set to nullptr in this call.
+        if (UncompleteRets)
+          for (Instruction *UncompleteRetI : *UncompleteRets)
+            deduceOperandElementType(UncompleteRetI, nullptr, AskOps,
+                                     IsPostprocessing);
+      } else if (UncompleteRets) {
+        UncompleteRets->insert(I);
       }
       return;
     }
+    Uncomplete = isTodoType(CurrF);
     Ops.push_back(std::make_pair(Op, 0));
   } else if (auto *Ref = dyn_cast<ICmpInst>(I)) {
     if (!isPointerTy(Ref->getOperand(0)->getType()))
@@ -868,37 +1056,53 @@
     Type *ElemTy1 = GR->findDeducedElementType(Op1);
     if (ElemTy0) {
       KnownElemTy = ElemTy0;
+      Uncomplete = isTodoType(Op0);
       Ops.push_back(std::make_pair(Op1, 1));
     } else if (ElemTy1) {
       KnownElemTy = ElemTy1;
+      Uncomplete = isTodoType(Op1);
       Ops.push_back(std::make_pair(Op0, 0));
     }
   } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
     if (!CI->isIndirectCall())
-      deduceOperandElementTypeCalledFunction(GR, I, InstrSet, CI, Ops,
-                                             KnownElemTy);
+      deduceOperandElementTypeCalledFunction(CI, Ops, KnownElemTy);
     else if (HaveFunPtrs)
-      deduceOperandElementTypeFunctionPointer(GR, I, CI, Ops, KnownElemTy);
+      deduceOperandElementTypeFunctionPointer(CI, Ops, KnownElemTy,
+                                              IsPostprocessing);
   }
 
   // There is no enough info to deduce types or all is valid.
   if (!KnownElemTy || Ops.size() == 0)
     return;
 
-  LLVMContext &Ctx = F->getContext();
+  LLVMContext &Ctx = CurrF->getContext();
   IRBuilder<> B(Ctx);
   for (auto &OpIt : Ops) {
     Value *Op = OpIt.first;
-    if (Op->use_empty() || (AskOp && Op != AskOp))
+    if (Op->use_empty())
       continue;
-    Type *Ty = AskOp ? AskTy : GR->findDeducedElementType(Op);
+    if (AskOps && !AskOps->contains(Op))
+      continue;
+    Type *AskTy = nullptr;
+    CallInst *AskCI = nullptr;
+    if (IsPostprocessing && AskOps) {
+      AskTy = GR->findDeducedElementType(Op);
+      AskCI = GR->findAssignPtrTypeInstr(Op);
+      assert(AskTy && AskCI);
+    }
+    Type *Ty = AskTy ? AskTy : GR->findDeducedElementType(Op);
     if (Ty == KnownElemTy)
       continue;
     Value *OpTyVal = PoisonValue::get(KnownElemTy);
     Type *OpTy = Op->getType();
-    if (!Ty || AskTy || isUntypedPointerTy(Ty) ||
-        UncompleteTypeInfo.contains(Op)) {
+    if (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op)) {
+      Type *PrevElemTy = GR->findDeducedElementType(Op);
       GR->addDeducedElementType(Op, KnownElemTy);
+      // check if KnownElemTy is complete
+      if (!Uncomplete)
+        eraseTodoType(Op);
+      else if (!IsPostprocessing)
+        insertTodoType(Op);
       // check if there is existing Intrinsic::spv_assign_ptr_type instruction
       CallInst *AssignCI = AskCI ? AskCI : GR->findAssignPtrTypeInstr(Op);
       if (AssignCI == nullptr) {
@@ -910,31 +1114,21 @@
         GR->addAssignPtrTypeInstr(Op, CI);
       } else {
         updateAssignType(AssignCI, Op, OpTyVal);
+        DenseSet<std::pair<Value *, Value *>> VisitedSubst{
+            std::make_pair(I, Op)};
+        propagateElemTypeRec(Op, KnownElemTy, PrevElemTy, VisitedSubst);
       }
     } else {
-      if (auto *OpI = dyn_cast<Instruction>(Op)) {
-        // spv_ptrcast's argument Op denotes an instruction that generates
-        // a value, and we may use getInsertionPointAfterDef()
-        B.SetInsertPoint(*OpI->getInsertionPointAfterDef());
-        B.SetCurrentDebugLocation(OpI->getDebugLoc());
-      } else if (auto *OpA = dyn_cast<Argument>(Op)) {
-        B.SetInsertPointPastAllocas(OpA->getParent());
-        B.SetCurrentDebugLocation(DebugLoc());
-      } else {
-        B.SetInsertPoint(F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca());
-      }
-      SmallVector<Type *, 2> Types = {OpTy, OpTy};
-      SmallVector<Value *, 2> Args = {Op, buildMD(OpTyVal),
-                                      B.getInt32(getPointerAddressSpace(OpTy))};
+      eraseTodoType(Op);
       CallInst *PtrCastI =
-          B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
+          buildSpvPtrcast(I->getParent()->getParent(), Op, KnownElemTy);
       if (OpIt.second == std::numeric_limits<unsigned>::max())
         dyn_cast<CallInst>(I)->setCalledOperand(PtrCastI);
       else
         I->setOperand(OpIt.second, PtrCastI);
-      buildAssignPtr(B, KnownElemTy, PtrCastI);
     }
   }
+  TypeValidated.insert(I);
 }
 
 void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
@@ -961,7 +1155,7 @@
 
 void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) {
   std::queue<Instruction *> Worklist;
-  for (auto &I : instructions(F))
+  for (auto &I : instructions(CurrF))
     Worklist.push(&I);
 
   while (!Worklist.empty()) {
@@ -989,7 +1183,7 @@
 
 void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) {
   std::queue<Instruction *> Worklist;
-  for (auto &I : instructions(F))
+  for (auto &I : instructions(CurrF))
     Worklist.push(&I);
 
   while (!Worklist.empty()) {
@@ -1048,7 +1242,7 @@
     return &Call;
 
   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
-  LLVMContext &Ctx = F->getContext();
+  LLVMContext &Ctx = CurrF->getContext();
 
   Constant *TyC = UndefValue::get(IA->getFunctionType());
   MDString *ConstraintString = MDString::get(Ctx, IA->getConstraintString());
@@ -1141,9 +1335,9 @@
   Type *VTy = V->getType();
 
   // A couple of sanity checks.
-  assert(isPointerTy(VTy) && "Expect a pointer type!");
-  if (auto PType = dyn_cast<TypedPointerType>(VTy))
-    if (PType->getElementType() != AssignedType)
+  assert((isPointerTy(VTy)) && "Expect a pointer type!");
+  if (Type *ElemTy = getPointeeType(VTy))
+    if (ElemTy != AssignedType)
       report_fatal_error("Unexpected pointer element type!");
 
   CallInst *AssignCI = GR->findAssignPtrTypeInstr(V);
@@ -1174,6 +1368,7 @@
 void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
     Instruction *I, Value *Pointer, Type *ExpectedElementType,
     unsigned OperandToReplace, IRBuilder<> &B) {
+  TypeValidated.insert(I);
   // If Pointer is the result of nop BitCastInst (ptr -> ptr), use the source
   // pointer instead. The BitCastInst should be later removed when visited.
   while (BitCastInst *BC = dyn_cast<BitCastInst>(Pointer))
@@ -1186,7 +1381,8 @@
     return;
 
   setInsertPointSkippingPhis(B, I);
-  MetadataAsValue *VMD = buildMD(PoisonValue::get(ExpectedElementType));
+  Value *ExpectedElementVal = PoisonValue::get(ExpectedElementType);
+  MetadataAsValue *VMD = buildMD(ExpectedElementVal);
   unsigned AddressSpace = getPointerAddressSpace(Pointer->getType());
   bool FirstPtrCastOrAssignPtrType = true;
 
@@ -1223,17 +1419,30 @@
     return;
   }
 
-  // // Do not emit spv_ptrcast if it would cast to the default pointer element
-  // // type (i8) of the same address space.
-  // if (ExpectedElementType->isIntegerTy(8))
-  //   return;
-
-  // If this would be the first spv_ptrcast, do not emit spv_ptrcast and emit
-  // spv_assign_ptr_type instead.
-  if (FirstPtrCastOrAssignPtrType &&
-      (isa<Instruction>(Pointer) || isa<Argument>(Pointer))) {
-    buildAssignPtr(B, ExpectedElementType, Pointer);
-    return;
+  if (isa<Instruction>(Pointer) || isa<Argument>(Pointer)) {
+    if (FirstPtrCastOrAssignPtrType) {
+      // If this would be the first spv_ptrcast, do not emit spv_ptrcast and
+      // emit spv_assign_ptr_type instead.
+      buildAssignPtr(B, ExpectedElementType, Pointer);
+      return;
+    } else if (isTodoType(Pointer)) {
+      eraseTodoType(Pointer);
+      if (!isa<CallInst>(Pointer) && !isa<GetElementPtrInst>(Pointer)) {
+        //  If this wouldn't be the first spv_ptrcast but existing type info is
+        //  uncomplete, update spv_assign_ptr_type arguments.
+        if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Pointer)) {
+          Type *PrevElemTy = GR->findDeducedElementType(Pointer);
+          assert(PrevElemTy);
+          DenseSet<std::pair<Value *, Value *>> VisitedSubst{
+              std::make_pair(I, Pointer)};
+          updateAssignType(AssignCI, Pointer, ExpectedElementVal);
+          propagateElemType(Pointer, PrevElemTy, VisitedSubst);
+        } else {
+          buildAssignPtr(B, ExpectedElementType, Pointer);
+        }
+        return;
+      }
+    }
   }
 
   // Emit spv_ptrcast
@@ -1249,27 +1458,48 @@
                                                          IRBuilder<> &B) {
   // Handle basic instructions:
   StoreInst *SI = dyn_cast<StoreInst>(I);
-  if (IsKernelArgInt8(F, SI)) {
-    return replacePointerOperandWithPtrCast(
-        I, SI->getValueOperand(), IntegerType::getInt8Ty(F->getContext()), 0,
-        B);
-  } else if (SI) {
+  if (IsKernelArgInt8(CurrF, SI)) {
+    replacePointerOperandWithPtrCast(
+        I, SI->getValueOperand(), IntegerType::getInt8Ty(CurrF->getContext()),
+        0, B);
+  }
+  if (SI) {
     Value *Op = SI->getValueOperand();
+    Value *Pointer = SI->getPointerOperand();
     Type *OpTy = Op->getType();
     if (auto *OpI = dyn_cast<Instruction>(Op))
       OpTy = restoreMutatedType(GR, OpI, OpTy);
     if (OpTy == Op->getType())
       OpTy = deduceElementTypeByValueDeep(OpTy, Op, false);
-    return replacePointerOperandWithPtrCast(I, SI->getPointerOperand(), OpTy, 1,
-                                            B);
-  } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    return replacePointerOperandWithPtrCast(I, LI->getPointerOperand(),
-                                            LI->getType(), 0, B);
-  } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
-    return replacePointerOperandWithPtrCast(I, GEPI->getPointerOperand(),
-                                            GEPI->getSourceElementType(), 0, B);
+    replacePointerOperandWithPtrCast(I, Pointer, OpTy, 1, B);
+    return;
+  }
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    Value *Pointer = LI->getPointerOperand();
+    Type *OpTy = LI->getType();
+    if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) {
+      if (Type *ElemTy = GR->findDeducedElementType(LI)) {
+        OpTy = getTypedPointerWrapper(ElemTy, PtrTy->getAddressSpace());
+      } else {
+        Type *NewOpTy = OpTy;
+        OpTy = deduceElementTypeByValueDeep(OpTy, LI, false);
+        if (OpTy == NewOpTy)
+          insertTodoType(Pointer);
+      }
+    }
+    replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
+    return;
+  }
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+    Value *Pointer = GEPI->getPointerOperand();
+    Type *OpTy = GEPI->getSourceElementType();
+    replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
+    if (isNestedPointer(OpTy))
+      insertTodoType(Pointer);
+    return;
   }
 
+  // TODO: review and merge with existing logics:
   // Handle calls to builtins (non-intrinsics):
   CallInst *CI = dyn_cast<CallInst>(I);
   if (!CI || CI->isIndirectCall() || CI->isInlineAsm() ||
@@ -1287,8 +1517,8 @@
     Type *ArgType = CalledArg->getType();
     if (!isPointerTy(ArgType)) {
       CalledArgTys.push_back(nullptr);
-    } else if (isTypedPointerTy(ArgType)) {
-      CalledArgTys.push_back(cast<TypedPointerType>(ArgType)->getElementType());
+    } else if (Type *ArgTypeElem = getPointeeType(ArgType)) {
+      CalledArgTys.push_back(ArgTypeElem);
       HaveTypes = true;
     } else {
       Type *ElemTy = GR->findDeducedElementType(CalledArg);
@@ -1338,7 +1568,8 @@
     if (!ExpectedType || ExpectedType->isVoidTy())
       continue;
 
-    if (ExpectedType->isTargetExtTy())
+    if (ExpectedType->isTargetExtTy() &&
+        !isTypedPointerWrapper(cast<TargetExtType>(ExpectedType)))
       insertAssignPtrTypeTargetExt(cast<TargetExtType>(ExpectedType),
                                    ArgOperand, B);
     else
@@ -1419,7 +1650,7 @@
   TrackConstants = false;
   const auto *TLI = TM->getSubtargetImpl()->getTargetLowering();
   MachineMemOperand::Flags Flags =
-      TLI->getLoadMemOperandFlags(I, F->getDataLayout());
+      TLI->getLoadMemOperandFlags(I, CurrF->getDataLayout());
   auto *NewI =
       B.CreateIntrinsic(Intrinsic::spv_load, {I.getOperand(0)->getType()},
                         {I.getPointerOperand(), B.getInt16(Flags),
@@ -1436,7 +1667,7 @@
   TrackConstants = false;
   const auto *TLI = TM->getSubtargetImpl()->getTargetLowering();
   MachineMemOperand::Flags Flags =
-      TLI->getStoreMemOperandFlags(I, F->getDataLayout());
+      TLI->getStoreMemOperandFlags(I, CurrF->getDataLayout());
   auto *PtrOp = I.getPointerOperand();
   auto *NewI = B.CreateIntrinsic(
       Intrinsic::spv_store, {I.getValueOperand()->getType(), PtrOp->getType()},
@@ -1602,8 +1833,9 @@
         GR->addAssignPtrTypeInstr(Op, AssignCI);
       } else if (!isa<Instruction>(Op)) {
         Type *OpTy = Op->getType();
-        if (auto PType = dyn_cast<TypedPointerType>(OpTy)) {
-          buildAssignPtr(B, PType->getElementType(), Op);
+        Type *OpTyElem = getPointeeType(OpTy);
+        if (OpTyElem) {
+          buildAssignPtr(B, OpTyElem, Op);
         } else if (isPointerTy(OpTy)) {
           Type *ElemTy = GR->findDeducedElementType(Op);
           buildAssignPtr(B, ElemTy ? ElemTy : deduceElementType(Op, true), Op);
@@ -1742,9 +1974,44 @@
     if (!isUntypedPointerTy(Arg->getType()))
       continue;
     Type *ElemTy = GR->findDeducedElementType(Arg);
-    if (!ElemTy && hasPointeeTypeAttr(Arg) &&
-        (ElemTy = getPointeeTypeByAttr(Arg)) != nullptr)
+    if (ElemTy)
+      continue;
+    if (hasPointeeTypeAttr(Arg) &&
+        (ElemTy = getPointeeTypeByAttr(Arg)) != nullptr) {
       buildAssignPtr(B, ElemTy, Arg);
+      continue;
+    }
+    // search in function's call sites
+    for (User *U : F->users()) {
+      CallInst *CI = dyn_cast<CallInst>(U);
+      if (!CI || OpIdx >= CI->arg_size())
+        continue;
+      Value *OpArg = CI->getArgOperand(OpIdx);
+      if (!isPointerTy(OpArg->getType()))
+        continue;
+      // maybe we already know operand's element type
+      if ((ElemTy = GR->findDeducedElementType(OpArg)) != nullptr)
+        break;
+    }
+    if (ElemTy) {
+      buildAssignPtr(B, ElemTy, Arg);
+      continue;
+    }
+    if (HaveFunPtrs) {
+      for (User *U : Arg->users()) {
+        CallInst *CI = dyn_cast<CallInst>(U);
+        if (CI && !isa<IntrinsicInst>(CI) && CI->isIndirectCall() &&
+            CI->getCalledOperand() == Arg &&
+            CI->getParent()->getParent() == CurrF) {
+          SmallVector<std::pair<Value *, unsigned>> Ops;
+          deduceOperandElementTypeFunctionPointer(CI, Ops, ElemTy, false);
+          if (ElemTy) {
+            buildAssignPtr(B, ElemTy, Arg);
+            break;
+          }
+        }
+      }
+    }
   }
 }
 
@@ -1770,7 +2037,7 @@
     if (ArgTy->isPointerTy())
       if (Type *ElemTy = GR->findDeducedElementType(&Arg)) {
         IsNewFTy = true;
-        ArgTy = TypedPointerType::get(ElemTy, getPointerAddressSpace(ArgTy));
+        ArgTy = getTypedPointerWrapper(ElemTy, getPointerAddressSpace(ArgTy));
       }
     ArgTys.push_back(ArgTy);
   }
@@ -1845,17 +2112,17 @@
   InstrSet = ST.isOpenCLEnv() ? SPIRV::InstructionSet::OpenCL_std
                               : SPIRV::InstructionSet::GLSL_std_450;
 
-  if (!F)
+  if (!CurrF)
     HaveFunPtrs =
         ST.canUseExtension(SPIRV::Extension::SPV_INTEL_function_pointers);
 
-  F = &Func;
+  CurrF = &Func;
   IRBuilder<> B(Func.getContext());
   AggrConsts.clear();
   AggrConstTypes.clear();
   AggrStores.clear();
 
-  processParamTypesByFunHeader(F, B);
+  processParamTypesByFunHeader(CurrF, B);
 
   // StoreInst's operand type can be changed during the next transformations,
   // so we need to store it in the set. Also store already transformed types.
@@ -1878,6 +2145,7 @@
   for (auto &I : instructions(Func))
     Worklist.push_back(&I);
 
+  // Pass forward: use operand to deduce instructions result.
   for (auto &I : Worklist) {
     // Don't emit intrinsincs for convergence intrinsics.
     if (isConvergenceIntrinsic(I))
@@ -1894,8 +2162,18 @@
       insertAssignPtrTypeIntrs(I, B, true);
   }
 
-  for (auto &I : instructions(Func))
-    deduceOperandElementType(&I);
+  // Pass backward: use instructions results to specify/update/cast operands
+  // where needed.
+  SmallPtrSet<Instruction *, 4> UncompleteRets;
+  for (auto &I : llvm::reverse(instructions(Func)))
+    deduceOperandElementType(&I, &UncompleteRets);
+
+  // Pass forward for PHIs only, their operands are not preceed the instruction
+  // in meaning of `instructions(Func)`.
+  for (BasicBlock &BB : Func)
+    for (PHINode &Phi : BB.phis())
+      if (isPointerTy(Phi.getType()))
+        deduceOperandElementType(&Phi, nullptr);
 
   for (auto *I : Worklist) {
     TrackConstants = true;
@@ -1917,83 +2195,83 @@
   return true;
 }
 
-void SPIRVEmitIntrinsics::replaceWithPtrcasted(Instruction *CI, Type *NewElemTy,
-                                               Type *KnownElemTy,
-                                               CallInst *AssignCI) {
-  updateAssignType(AssignCI, CI, PoisonValue::get(NewElemTy));
-  IRBuilder<> B(CI->getContext());
-  B.SetInsertPoint(*CI->getInsertionPointAfterDef());
-  B.SetCurrentDebugLocation(CI->getDebugLoc());
-  Type *OpTy = CI->getType();
-  SmallVector<Type *, 2> Types = {OpTy, OpTy};
-  SmallVector<Value *, 2> Args = {CI, buildMD(PoisonValue::get(KnownElemTy)),
-                                  B.getInt32(getPointerAddressSpace(OpTy))};
-  CallInst *PtrCasted =
-      B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
-  SmallVector<User *> Users(CI->users());
-  for (auto *U : Users)
-    if (U != AssignCI && U != PtrCasted)
-      U->replaceUsesOfWith(CI, PtrCasted);
-  buildAssignPtr(B, KnownElemTy, PtrCasted);
-}
-
 // Try to deduce a better type for pointers to untyped ptr.
-bool SPIRVEmitIntrinsics::postprocessTypes() {
-  bool Changed = false;
-  if (!GR)
-    return Changed;
-  for (auto IB = PostprocessWorklist.rbegin(), IE = PostprocessWorklist.rend();
-       IB != IE; ++IB) {
-    CallInst *AssignCI = GR->findAssignPtrTypeInstr(*IB);
-    Type *KnownTy = GR->findDeducedElementType(*IB);
-    if (!KnownTy || !AssignCI || !isa<Instruction>(AssignCI->getArgOperand(0)))
+bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
+  if (!GR || TodoTypeSz == 0)
+    return false;
+
+  unsigned SzTodo = TodoTypeSz;
+  DenseMap<Value *, SmallPtrSet<Value *, 4>> ToProcess;
+  for (auto [Op, Enabled] : TodoType) {
+    // TODO: add isa<CallInst>(Op) to continue
+    if (!Enabled || isa<GetElementPtrInst>(Op))
       continue;
+    CallInst *AssignCI = GR->findAssignPtrTypeInstr(Op);
+    Type *KnownTy = GR->findDeducedElementType(Op);
+    if (!KnownTy || !AssignCI)
+      continue;
+    assert(Op == AssignCI->getArgOperand(0));
     // Try to improve the type deduced after all Functions are processed.
-    if (auto *CI = dyn_cast<CallInst>(*IB)) {
-      if (Function *CalledF = CI->getCalledFunction()) {
-        Type *RetElemTy = GR->findDeducedElementType(CalledF);
-        // Fix inconsistency between known type and function's return type.
-        if (RetElemTy && RetElemTy != KnownTy) {
-          replaceWithPtrcasted(CI, RetElemTy, KnownTy, AssignCI);
-          Changed = true;
+    if (auto *CI = dyn_cast<Instruction>(Op)) {
+      CurrF = CI->getParent()->getParent();
+      std::unordered_set<Value *> Visited;
+      if (Type *ElemTy = deduceElementTypeHelper(Op, Visited, false, true)) {
+        if (ElemTy != KnownTy) {
+          DenseSet<std::pair<Value *, Value *>> VisitedSubst;
+          propagateElemType(CI, ElemTy, VisitedSubst);
+          eraseTodoType(Op);
           continue;
         }
       }
     }
-    Instruction *I = cast<Instruction>(AssignCI->getArgOperand(0));
-    for (User *U : I->users()) {
+    for (User *U : Op->users()) {
       Instruction *Inst = dyn_cast<Instruction>(U);
-      if (!Inst || isa<IntrinsicInst>(Inst))
-        continue;
-      deduceOperandElementType(Inst, I, KnownTy, AssignCI);
-      if (KnownTy != GR->findDeducedElementType(I)) {
-        Changed = true;
-        break;
-      }
+      if (Inst && !isa<IntrinsicInst>(Inst))
+        ToProcess[Inst].insert(Op);
     }
   }
-  return Changed;
+  if (TodoTypeSz == 0)
+    return true;
+
+  for (auto &F : M) {
+    CurrF = &F;
+    SmallPtrSet<Instruction *, 4> UncompleteRets;
+    for (auto &I : llvm::reverse(instructions(F))) {
+      auto It = ToProcess.find(&I);
+      if (It == ToProcess.end())
+        continue;
+      It->second.remove_if([this](Value *V) { return !isTodoType(V); });
+      if (It->second.size() == 0)
+        continue;
+      deduceOperandElementType(&I, &UncompleteRets, &It->second, true);
+      if (TodoTypeSz == 0)
+        return true;
+    }
+  }
+
+  return SzTodo > TodoTypeSz;
 }
 
 bool SPIRVEmitIntrinsics::runOnModule(Module &M) {
   bool Changed = false;
 
-  UncompleteTypeInfo.clear();
-  PostprocessWorklist.clear();
+  TodoType.clear();
   for (auto &F : M)
     Changed |= runOnFunction(F);
 
+  // Specify function parameters after all functions were processed.
   for (auto &F : M) {
     // check if function parameter types are set
+    CurrF = &F;
     if (!F.isDeclaration() && !F.isIntrinsic()) {
-      const SPIRVSubtarget &ST = TM->getSubtarget<SPIRVSubtarget>(F);
-      GR = ST.getSPIRVGlobalRegistry();
       IRBuilder<> B(F.getContext());
       processParamTypes(&F, B);
     }
   }
 
-  Changed |= postprocessTypes();
+  CanTodoType = false;
+  Changed |= postprocessTypes(M);
+
   if (HaveFunPtrs)
     Changed |= processFunctionPointers(M);
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 6f22288..9ac659f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -28,6 +28,18 @@
 #include <functional>
 
 using namespace llvm;
+
+inline unsigned typeToAddressSpace(const Type *Ty) {
+  if (auto PType = dyn_cast<TypedPointerType>(Ty))
+    return PType->getAddressSpace();
+  if (auto PType = dyn_cast<PointerType>(Ty))
+    return PType->getAddressSpace();
+  if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
+      ExtTy && isTypedPointerWrapper(ExtTy))
+    return ExtTy->getIntParameter(0);
+  report_fatal_error("Unable to convert LLVM type to SPIRVType", true);
+}
+
 SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize)
     : PointerSize(PointerSize), Bound(0) {}
 
@@ -69,7 +81,7 @@
 
 void SPIRVGlobalRegistry::assignSPIRVTypeToVReg(SPIRVType *SpirvType,
                                                 Register VReg,
-                                                MachineFunction &MF) {
+                                                const MachineFunction &MF) {
   VRegToTypeMap[&MF][VReg] = SpirvType;
 }
 
@@ -570,15 +582,15 @@
 SPIRVGlobalRegistry::getOrCreateConstNullPtr(MachineIRBuilder &MIRBuilder,
                                              SPIRVType *SpvType) {
   const Type *LLVMTy = getTypeForSPIRVType(SpvType);
-  const TypedPointerType *LLVMPtrTy = cast<TypedPointerType>(LLVMTy);
+  unsigned AddressSpace = typeToAddressSpace(LLVMTy);
   // Find a constant in DT or build a new one.
-  Constant *CP = ConstantPointerNull::get(PointerType::get(
-      LLVMPtrTy->getElementType(), LLVMPtrTy->getAddressSpace()));
+  Constant *CP = ConstantPointerNull::get(
+      PointerType::get(::getPointeeType(LLVMTy), AddressSpace));
   Register Res = DT.find(CP, CurMF);
   if (!Res.isValid()) {
-    LLT LLTy = LLT::pointer(LLVMPtrTy->getAddressSpace(), PointerSize);
+    LLT LLTy = LLT::pointer(AddressSpace, PointerSize);
     Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
-    CurMF->getRegInfo().setRegClass(Res, &SPIRV::iIDRegClass);
+    CurMF->getRegInfo().setRegClass(Res, &SPIRV::pIDRegClass);
     assignSPIRVTypeToVReg(SpvType, Res, *CurMF);
     MIRBuilder.buildInstr(SPIRV::OpConstantNull)
         .addDef(Res)
@@ -978,18 +990,11 @@
     }
     return getOpTypeFunction(RetTy, ParamTypes, MIRBuilder);
   }
-  unsigned AddrSpace = 0xFFFF;
-  if (auto PType = dyn_cast<TypedPointerType>(Ty))
-    AddrSpace = PType->getAddressSpace();
-  else if (auto PType = dyn_cast<PointerType>(Ty))
-    AddrSpace = PType->getAddressSpace();
-  else
-    report_fatal_error("Unable to convert LLVM type to SPIRVType", true);
 
+  unsigned AddrSpace = typeToAddressSpace(Ty);
   SPIRVType *SpvElementType = nullptr;
-  if (auto PType = dyn_cast<TypedPointerType>(Ty))
-    SpvElementType = getOrCreateSPIRVType(PType->getElementType(), MIRBuilder,
-                                          AccQual, EmitIR);
+  if (Type *ElemTy = ::getPointeeType(Ty))
+    SpvElementType = getOrCreateSPIRVType(ElemTy, MIRBuilder, AccQual, EmitIR);
   else
     SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder);
 
@@ -1029,7 +1034,11 @@
   // will be added later. For special types it is already added to DT.
   if (SpirvType->getOpcode() != SPIRV::OpTypeForwardPointer && !Reg.isValid() &&
       !isSpecialOpaqueType(Ty)) {
-    if (!isPointerTy(Ty))
+    if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
+        ExtTy && isTypedPointerWrapper(ExtTy))
+      DT.add(ExtTy->getTypeParameter(0), ExtTy->getIntParameter(0),
+             &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType));
+    else if (!isPointerTy(Ty))
       DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType));
     else if (isTypedPointerTy(Ty))
       DT.add(cast<TypedPointerType>(Ty)->getElementType(),
@@ -1065,7 +1074,11 @@
     const Type *Ty, MachineIRBuilder &MIRBuilder,
     SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) {
   Register Reg;
-  if (!isPointerTy(Ty)) {
+  if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
+      ExtTy && isTypedPointerWrapper(ExtTy)) {
+    Reg = DT.find(ExtTy->getTypeParameter(0), ExtTy->getIntParameter(0),
+                  &MIRBuilder.getMF());
+  } else if (!isPointerTy(Ty)) {
     Ty = adjustIntTypeByWidth(Ty);
     Reg = DT.find(Ty, &MIRBuilder.getMF());
   } else if (isTypedPointerTy(Ty)) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index 3bb86e8..ff4b0ea 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -330,7 +330,7 @@
   // In cases where the SPIR-V type is already known, this function can be
   // used to map it to the given VReg via an ASSIGN_TYPE instruction.
   void assignSPIRVTypeToVReg(SPIRVType *Type, Register VReg,
-                             MachineFunction &MF);
+                             const MachineFunction &MF);
 
   // Either generate a new OpTypeXXX instruction or return an existing one
   // corresponding to the given LLVM IR type.
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index 59a1bf5..d5b81bf 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -111,8 +111,8 @@
                             SPIRVGlobalRegistry &GR, MachineInstr &I,
                             Register OpReg, unsigned OpIdx,
                             SPIRVType *NewPtrType) {
-  Register NewReg = MRI->createGenericVirtualRegister(LLT::scalar(64));
   MachineIRBuilder MIB(I);
+  Register NewReg = createVirtualRegister(NewPtrType, &GR, MRI, MIB.getMF());
   bool Res = MIB.buildInstr(SPIRV::OpBitcast)
                  .addDef(NewReg)
                  .addUse(GR.getSPIRVTypeID(NewPtrType))
@@ -121,8 +121,6 @@
                                    *STI.getRegBankInfo());
   if (!Res)
     report_fatal_error("insert validation bitcast: cannot constrain all uses");
-  MRI->setRegClass(NewReg, &SPIRV::iIDRegClass);
-  GR.assignSPIRVTypeToVReg(NewPtrType, NewReg, MIB.getMF());
   I.getOperand(OpIdx).setReg(NewReg);
 }
 
@@ -396,6 +394,7 @@
       case SPIRV::OpGenericCastToPtr:
         validateAccessChain(STI, MRI, GR, MI);
         break;
+      case SPIRV::OpPtrAccessChain:
       case SPIRV::OpInBoundsPtrAccessChain:
         if (MI.getNumOperands() == 4)
           validateAccessChain(STI, MRI, GR, MI);
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index ec1e13a..90898b8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -357,12 +357,13 @@
   verify(*ST.getInstrInfo());
 }
 
-static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpirvType,
+static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpvType,
                                 LegalizerHelper &Helper,
                                 MachineRegisterInfo &MRI,
                                 SPIRVGlobalRegistry *GR) {
   Register ConvReg = MRI.createGenericVirtualRegister(ConvTy);
-  GR->assignSPIRVTypeToVReg(SpirvType, ConvReg, Helper.MIRBuilder.getMF());
+  MRI.setRegClass(ConvReg, GR->getRegClass(SpvType));
+  GR->assignSPIRVTypeToVReg(SpvType, ConvReg, Helper.MIRBuilder.getMF());
   Helper.MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
       .addDef(ConvReg)
       .addUse(Reg);
diff --git a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
index 11b9e4f..3373d8e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
@@ -102,10 +102,7 @@
           if (!ResType) {
             // There was no "assign type" actions, let's fix this now
             ResType = ScalarType;
-            MRI.setRegClass(ResVReg, &SPIRV::iIDRegClass);
-            MRI.setType(ResVReg,
-                        LLT::scalar(GR->getScalarOrVectorBitWidth(ResType)));
-            GR->assignSPIRVTypeToVReg(ResType, ResVReg, *GR->CurMF);
+            setRegClassType(ResVReg, ResType, GR, &MRI, *GR->CurMF, true);
           }
         }
       } else if (mayBeInserted(Opcode) && I.getNumDefs() == 1 &&
@@ -124,9 +121,7 @@
           if (!ResVType)
             continue;
           // Set type & class
-          MRI.setRegClass(ResVReg, GR->getRegClass(ResVType));
-          MRI.setType(ResVReg, GR->getRegType(ResVType));
-          GR->assignSPIRVTypeToVReg(ResVType, ResVReg, *GR->CurMF);
+          setRegClassType(ResVReg, ResVType, GR, &MRI, *GR->CurMF, true);
         }
         // If this is a simple operation that is to be reduced by TableGen
         // definition we must apply some of pre-legalizer rules here
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index f899b8b..1ece304 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -13,6 +13,7 @@
 #include "SPIRVUtils.h"
 #include "MCTargetDesc/SPIRVBaseInfo.h"
 #include "SPIRV.h"
+#include "SPIRVGlobalRegistry.h"
 #include "SPIRVInstrInfo.h"
 #include "SPIRVSubtarget.h"
 #include "llvm/ADT/StringRef.h"
@@ -21,6 +22,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Demangle/Demangle.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsSPIRV.h"
 #include <queue>
 #include <vector>
@@ -405,8 +407,10 @@
 }
 
 bool isSpecialOpaqueType(const Type *Ty) {
-  if (const TargetExtType *EType = dyn_cast<TargetExtType>(Ty))
-    return hasBuiltinTypePrefix(EType->getName());
+  if (const TargetExtType *ExtTy = dyn_cast<TargetExtType>(Ty))
+    return isTypedPointerWrapper(ExtTy)
+               ? false
+               : hasBuiltinTypePrefix(ExtTy->getName());
 
   return false;
 }
@@ -684,4 +688,77 @@
   return false;
 }
 
+// Assign SPIR-V type to the register. If the register has no valid assigned
+// class, set register LLT type and class according to the SPIR-V type.
+void setRegClassType(Register Reg, SPIRVType *SpvType, SPIRVGlobalRegistry *GR,
+                     MachineRegisterInfo *MRI, const MachineFunction &MF,
+                     bool Force) {
+  GR->assignSPIRVTypeToVReg(SpvType, Reg, MF);
+  if (!MRI->getRegClassOrNull(Reg) || Force) {
+    MRI->setRegClass(Reg, GR->getRegClass(SpvType));
+    MRI->setType(Reg, GR->getRegType(SpvType));
+  }
+}
+
+// Create a SPIR-V type, assign SPIR-V type to the register. If the register has
+// no valid assigned class, set register LLT type and class according to the
+// SPIR-V type.
+void setRegClassType(Register Reg, const Type *Ty, SPIRVGlobalRegistry *GR,
+                     MachineIRBuilder &MIRBuilder, bool Force) {
+  setRegClassType(Reg, GR->getOrCreateSPIRVType(Ty, MIRBuilder), GR,
+                  MIRBuilder.getMRI(), MIRBuilder.getMF(), Force);
+}
+
+// Create a virtual register and assign SPIR-V type to the register. Set
+// register LLT type and class according to the SPIR-V type.
+Register createVirtualRegister(SPIRVType *SpvType, SPIRVGlobalRegistry *GR,
+                               MachineRegisterInfo *MRI,
+                               const MachineFunction &MF) {
+  Register Reg = MRI->createVirtualRegister(GR->getRegClass(SpvType));
+  MRI->setType(Reg, GR->getRegType(SpvType));
+  GR->assignSPIRVTypeToVReg(SpvType, Reg, MF);
+  return Reg;
+}
+
+// Create a virtual register and assign SPIR-V type to the register. Set
+// register LLT type and class according to the SPIR-V type.
+Register createVirtualRegister(SPIRVType *SpvType, SPIRVGlobalRegistry *GR,
+                               MachineIRBuilder &MIRBuilder) {
+  return createVirtualRegister(SpvType, GR, MIRBuilder.getMRI(),
+                               MIRBuilder.getMF());
+}
+
+// Create a SPIR-V type, virtual register and assign SPIR-V type to the
+// register. Set register LLT type and class according to the SPIR-V type.
+Register createVirtualRegister(const Type *Ty, SPIRVGlobalRegistry *GR,
+                               MachineIRBuilder &MIRBuilder) {
+  return createVirtualRegister(GR->getOrCreateSPIRVType(Ty, MIRBuilder), GR,
+                               MIRBuilder);
+}
+
+// Return true if there is an opaque pointer type nested in the argument.
+bool isNestedPointer(const Type *Ty) {
+  if (Ty->isPtrOrPtrVectorTy())
+    return true;
+  if (const FunctionType *RefTy = dyn_cast<FunctionType>(Ty)) {
+    if (isNestedPointer(RefTy->getReturnType()))
+      return true;
+    for (const Type *ArgTy : RefTy->params())
+      if (isNestedPointer(ArgTy))
+        return true;
+    return false;
+  }
+  if (const ArrayType *RefTy = dyn_cast<ArrayType>(Ty))
+    return isNestedPointer(RefTy->getElementType());
+  return false;
+}
+
+bool isSpvIntrinsic(const Value *Arg) {
+  if (const auto *II = dyn_cast<IntrinsicInst>(Arg))
+    if (Function *F = II->getCalledFunction())
+      if (F->getName().starts_with("llvm.spv."))
+        return true;
+  return false;
+}
+
 } // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index d218dbd..c056954 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -34,6 +34,7 @@
 class StringRef;
 class SPIRVInstrInfo;
 class SPIRVSubtarget;
+class SPIRVGlobalRegistry;
 
 // This class implements a partial ordering visitor, which visits a cyclic graph
 // in natural topological-like ordering. Topological ordering is not defined for
@@ -198,6 +199,8 @@
 
 // Check if MI is a SPIR-V specific intrinsic call.
 bool isSpvIntrinsic(const MachineInstr &MI, Intrinsic::ID IntrinsicID);
+// Check if it's a SPIR-V specific intrinsic call.
+bool isSpvIntrinsic(const Value *Arg);
 
 // Get type of i-th operand of the metadata node.
 Type *getMDOperandAsType(const MDNode *N, unsigned I);
@@ -276,12 +279,19 @@
                             {ElemTy}, {AS});
 }
 
-inline bool isTypedPointerWrapper(TargetExtType *ExtTy) {
+inline bool isTypedPointerWrapper(const TargetExtType *ExtTy) {
   return ExtTy->getName() == TYPED_PTR_TARGET_EXT_NAME &&
          ExtTy->getNumIntParameters() == 1 &&
          ExtTy->getNumTypeParameters() == 1;
 }
 
+// True if this is an instance of PointerType or TypedPointerType.
+inline bool isPointerTyOrWrapper(const Type *Ty) {
+  if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
+    return isTypedPointerWrapper(ExtTy);
+  return isPointerTy(Ty);
+}
+
 inline Type *applyWrappers(Type *Ty) {
   if (auto *ExtTy = dyn_cast<TargetExtType>(Ty)) {
     if (isTypedPointerWrapper(ExtTy))
@@ -296,12 +306,14 @@
   return Ty;
 }
 
-inline Type *getPointeeType(Type *Ty) {
-  if (auto PType = dyn_cast<TypedPointerType>(Ty))
-    return PType->getElementType();
-  else if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
-    if (isTypedPointerWrapper(ExtTy))
-      return applyWrappers(ExtTy->getTypeParameter(0));
+inline Type *getPointeeType(const Type *Ty) {
+  if (Ty) {
+    if (auto PType = dyn_cast<TypedPointerType>(Ty))
+      return PType->getElementType();
+    else if (auto *ExtTy = dyn_cast<TargetExtType>(Ty))
+      if (isTypedPointerWrapper(ExtTy))
+        return ExtTy->getTypeParameter(0);
+  }
   return nullptr;
 }
 
@@ -360,5 +372,23 @@
 #define SPIRV_BACKEND_SERVICE_FUN_NAME "__spirv_backend_service_fun"
 bool getVacantFunctionName(Module &M, std::string &Name);
 
+void setRegClassType(Register Reg, const Type *Ty, SPIRVGlobalRegistry *GR,
+                     MachineIRBuilder &MIRBuilder, bool Force = false);
+void setRegClassType(Register Reg, const MachineInstr *SpvType,
+                     SPIRVGlobalRegistry *GR, MachineRegisterInfo *MRI,
+                     const MachineFunction &MF, bool Force = false);
+Register createVirtualRegister(const MachineInstr *SpvType,
+                               SPIRVGlobalRegistry *GR,
+                               MachineRegisterInfo *MRI,
+                               const MachineFunction &MF);
+Register createVirtualRegister(const MachineInstr *SpvType,
+                               SPIRVGlobalRegistry *GR,
+                               MachineIRBuilder &MIRBuilder);
+Register createVirtualRegister(const Type *Ty, SPIRVGlobalRegistry *GR,
+                               MachineIRBuilder &MIRBuilder);
+
+// Return true if there is an opaque pointer type nested in the argument.
+bool isNestedPointer(const Type *Ty);
+
 } // namespace llvm
 #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H