AMDGPU/GlobalISel: Legalize addrspacecast

Use a placeholder constant for now on targets
that need the load from the queue ptr.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353497 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index ff5b18a..b01cb27 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2174,6 +2174,7 @@
   case G_FPTOUI:
   case G_INTTOPTR:
   case G_PTRTOINT:
+  case G_ADDRSPACE_CAST:
     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
   case G_ICMP:
   case G_FCMP:
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 780af6b..3a7cb40 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -14,6 +14,9 @@
 #include "AMDGPU.h"
 #include "AMDGPULegalizerInfo.h"
 #include "AMDGPUTargetMachine.h"
+#include "SIMachineFunctionInfo.h"
+
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -316,6 +319,12 @@
         return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
       });
 
+  if (ST.hasFlatAddressSpace()) {
+    getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
+      .scalarize(0)
+      .custom();
+  }
+
   getActionDefinitionsBuilder({G_LOAD, G_STORE})
     .narrowScalarIf([](const LegalityQuery &Query) {
         unsigned Size = Query.Types[0].getSizeInBits();
@@ -587,3 +596,171 @@
   computeTables();
   verify(*ST.getInstrInfo());
 }
+
+bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
+                                         MachineRegisterInfo &MRI,
+                                         MachineIRBuilder &MIRBuilder,
+                                         GISelChangeObserver &Observer) const {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_ADDRSPACE_CAST:
+    return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
+  default:
+    return false;
+  }
+
+  llvm_unreachable("expected switch to return");
+}
+
+unsigned AMDGPULegalizerInfo::getSegmentAperture(
+  unsigned AS,
+  MachineRegisterInfo &MRI,
+  MachineIRBuilder &MIRBuilder) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const LLT S32 = LLT::scalar(32);
+
+  if (ST.hasApertureRegs()) {
+    // FIXME: Use inline constants (src_{shared, private}_base) instead of
+    // getreg.
+    unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
+        AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
+        AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
+    unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
+        AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
+        AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
+    unsigned Encoding =
+        AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
+        Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
+        WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
+
+    unsigned ShiftAmt = MRI.createGenericVirtualRegister(S32);
+    unsigned ApertureReg = MRI.createGenericVirtualRegister(S32);
+    unsigned GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+    MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
+      .addDef(GetReg)
+      .addImm(Encoding);
+    MRI.setType(GetReg, S32);
+
+    MIRBuilder.buildConstant(ShiftAmt, WidthM1 + 1);
+    MIRBuilder.buildInstr(TargetOpcode::G_SHL)
+      .addDef(ApertureReg)
+      .addUse(GetReg)
+      .addUse(ShiftAmt);
+
+    return ApertureReg;
+  }
+
+  unsigned QueuePtr = MRI.createGenericVirtualRegister(
+    LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
+
+  // FIXME: Placeholder until we can track the input registers.
+  MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef);
+
+  // Offset into amd_queue_t for group_segment_aperture_base_hi /
+  // private_segment_aperture_base_hi.
+  uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
+
+  // FIXME: Don't use undef
+  Value *V = UndefValue::get(PointerType::get(
+                               Type::getInt8Ty(MF.getFunction().getContext()),
+                               AMDGPUAS::CONSTANT_ADDRESS));
+
+  MachinePointerInfo PtrInfo(V, StructOffset);
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+    PtrInfo,
+    MachineMemOperand::MOLoad |
+    MachineMemOperand::MODereferenceable |
+    MachineMemOperand::MOInvariant,
+    4,
+    MinAlign(64, StructOffset));
+
+  unsigned LoadResult = MRI.createGenericVirtualRegister(S32);
+  unsigned LoadAddr = AMDGPU::NoRegister;
+
+  MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
+  MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
+  return LoadResult;
+}
+
+bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &MIRBuilder) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+
+  MIRBuilder.setInstr(MI);
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Src = MI.getOperand(1).getReg();
+
+  LLT DstTy = MRI.getType(Dst);
+  LLT SrcTy = MRI.getType(Src);
+  unsigned DestAS = DstTy.getAddressSpace();
+  unsigned SrcAS = SrcTy.getAddressSpace();
+
+  // TODO: Avoid reloading from the queue ptr for each cast, or at least each
+  // vector element.
+  assert(!DstTy.isVector());
+
+  const AMDGPUTargetMachine &TM
+    = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
+
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
+    MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
+    return true;
+  }
+
+  if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
+    assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
+           DestAS == AMDGPUAS::PRIVATE_ADDRESS);
+    unsigned NullVal = TM.getNullPointerValue(DestAS);
+
+    unsigned SegmentNullReg = MRI.createGenericVirtualRegister(DstTy);
+    unsigned FlatNullReg = MRI.createGenericVirtualRegister(SrcTy);
+
+    MIRBuilder.buildConstant(SegmentNullReg, NullVal);
+    MIRBuilder.buildConstant(FlatNullReg, 0);
+
+    unsigned PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
+
+    // Extract low 32-bits of the pointer.
+    MIRBuilder.buildExtract(PtrLo32, Src, 0);
+
+    unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
+    MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNullReg);
+    MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNullReg);
+
+    MI.eraseFromParent();
+    return true;
+  }
+
+  assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
+         SrcAS == AMDGPUAS::PRIVATE_ADDRESS);
+
+  unsigned FlatNullReg = MRI.createGenericVirtualRegister(DstTy);
+  unsigned SegmentNullReg = MRI.createGenericVirtualRegister(SrcTy);
+  MIRBuilder.buildConstant(SegmentNullReg, TM.getNullPointerValue(SrcAS));
+  MIRBuilder.buildConstant(FlatNullReg, TM.getNullPointerValue(DestAS));
+
+  unsigned ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
+
+  unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
+  MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNullReg);
+
+  unsigned BuildPtr = MRI.createGenericVirtualRegister(DstTy);
+
+  // Coerce the type of the low half of the result so we can use merge_values.
+  unsigned SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32));
+  MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
+    .addDef(SrcAsInt)
+    .addUse(Src);
+
+  // TODO: Should we allow mismatched types but matching sizes in merges to
+  // avoid the ptrtoint?
+  MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
+  MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNullReg);
+
+  MI.eraseFromParent();
+  return true;
+}
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index fbd5734..65fb9ca 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -27,6 +27,17 @@
 public:
   AMDGPULegalizerInfo(const GCNSubtarget &ST,
                       const GCNTargetMachine &TM);
+
+  bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      MachineIRBuilder &MIRBuilder,
+                      GISelChangeObserver &Observer) const override;
+
+  unsigned getSegmentAperture(unsigned AddrSpace,
+                              MachineRegisterInfo &MRI,
+                              MachineIRBuilder &MIRBuilder) const;
+
+  bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI,
+                             MachineIRBuilder &MIRBuilder) const;
 };
 } // End llvm namespace.
 #endif
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index cda3502..5fa4e37 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1217,7 +1217,8 @@
 static bool isFlatGlobalAddrSpace(unsigned AS) {
   return AS == AMDGPUAS::GLOBAL_ADDRESS ||
          AS == AMDGPUAS::FLAT_ADDRESS ||
-         AS == AMDGPUAS::CONSTANT_ADDRESS;
+         AS == AMDGPUAS::CONSTANT_ADDRESS ||
+         AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
 }
 
 bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
new file mode 100644
index 0000000..3b0d5f7
--- /dev/null
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
@@ -0,0 +1,393 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s  | FileCheck -check-prefix=VI %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s  | FileCheck -check-prefix=GFX9 %s
+
+---
+name: test_addrspacecast_p0_to_p1
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; VI-LABEL: name: test_addrspacecast_p0_to_p1
+    ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[COPY]](p0)
+    ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p1)
+    ; GFX9-LABEL: name: test_addrspacecast_p0_to_p1
+    ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(p1) = COPY [[COPY]](p0)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p1)
+    %0:_(p0) = COPY $vgpr0_vgpr1
+    %1:_(p1) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p1_to_p0
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; VI-LABEL: name: test_addrspacecast_p1_to_p0
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p1)
+    ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+    ; GFX9-LABEL: name: test_addrspacecast_p1_to_p0
+    ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p1)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(p0) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p0_to_p4
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; VI-LABEL: name: test_addrspacecast_p0_to_p4
+    ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p0)
+    ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p4)
+    ; GFX9-LABEL: name: test_addrspacecast_p0_to_p4
+    ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p0)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p4)
+    %0:_(p0) = COPY $vgpr0_vgpr1
+    %1:_(p4) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p4_to_p0
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; VI-LABEL: name: test_addrspacecast_p4_to_p0
+    ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p4)
+    ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+    ; GFX9-LABEL: name: test_addrspacecast_p4_to_p0
+    ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p4)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+    %0:_(p4) = COPY $vgpr0_vgpr1
+    %1:_(p0) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p0_to_p999
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; VI-LABEL: name: test_addrspacecast_p0_to_p999
+    ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY [[COPY]](p0)
+    ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p999)
+    ; GFX9-LABEL: name: test_addrspacecast_p0_to_p999
+    ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY [[COPY]](p0)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p999)
+    %0:_(p0) = COPY $vgpr0_vgpr1
+    %1:_(p999) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p999_to_p0
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; VI-LABEL: name: test_addrspacecast_p999_to_p0
+    ; VI: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p999)
+    ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+    ; GFX9-LABEL: name: test_addrspacecast_p999_to_p0
+    ; GFX9: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p999)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+    %0:_(p999) = COPY $vgpr0_vgpr1
+    %1:_(p0) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p5_to_p0
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; VI-LABEL: name: test_addrspacecast_p5_to_p0
+    ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+    ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; VI: [[C2:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+    ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[C2]], [[C3]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+    ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]]
+    ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5)
+    ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
+    ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+    ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
+    ; GFX9-LABEL: name: test_addrspacecast_p5_to_p0
+    ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+    ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]]
+    ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5)
+    ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
+    ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
+    %0:_(p5) = COPY $vgpr0
+    %1:_(p0) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p0_to_p5
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; VI-LABEL: name: test_addrspacecast_p0_to_p5
+    ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+    ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; VI: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0
+    ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
+    ; VI: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+    ; VI: $vgpr0 = COPY [[SELECT]](p5)
+    ; GFX9-LABEL: name: test_addrspacecast_p0_to_p5
+    ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+    ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+    ; GFX9: $vgpr0 = COPY [[SELECT]](p5)
+    %0:_(p0) = COPY $vgpr0_vgpr1
+    %1:_(p5) = G_ADDRSPACE_CAST %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: test_addrspacecast_p3_to_p0
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; VI-LABEL: name: test_addrspacecast_p3_to_p0
+    ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; VI: [[C2:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+    ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[C2]], [[C3]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+    ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]]
+    ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+    ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
+    ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+    ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
+    ; GFX9-LABEL: name: test_addrspacecast_p3_to_p0
+    ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]]
+    ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+    ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
+    ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
+    %0:_(p3) = COPY $vgpr0
+    %1:_(p0) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p0_to_p3
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; VI-LABEL: name: test_addrspacecast_p0_to_p3
+    ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; VI: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[COPY]](p0), 0
+    ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
+    ; VI: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+    ; VI: $vgpr0 = COPY [[SELECT]](p3)
+    ; GFX9-LABEL: name: test_addrspacecast_p0_to_p3
+    ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[COPY]](p0), 0
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+    ; GFX9: $vgpr0 = COPY [[SELECT]](p3)
+    %0:_(p0) = COPY $vgpr0_vgpr1
+    %1:_(p3) = G_ADDRSPACE_CAST %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: test_addrspacecast_v2p0_to_v2p1
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; VI-LABEL: name: test_addrspacecast_v2p0_to_v2p1
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>)
+    ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[UV]](p0)
+    ; VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[UV1]](p0)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[COPY1]](p1), [[COPY2]](p1)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
+    ; GFX9-LABEL: name: test_addrspacecast_v2p0_to_v2p1
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(p1) = COPY [[UV]](p0)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(p1) = COPY [[UV1]](p0)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[COPY1]](p1), [[COPY2]](p1)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
+    %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x p1>) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_addrspacecast_v2p1_to_v2p0
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; VI-LABEL: name: test_addrspacecast_v2p1_to_v2p0
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
+    ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[UV]](p1)
+    ; VI: [[COPY2:%[0-9]+]]:_(p0) = COPY [[UV1]](p1)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY1]](p0), [[COPY2]](p0)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
+    ; GFX9-LABEL: name: test_addrspacecast_v2p1_to_v2p0
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[UV]](p1)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(p0) = COPY [[UV1]](p1)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY1]](p0), [[COPY2]](p0)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
+    %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_addrspacecast_v2p0_to_v2p3
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; VI-LABEL: name: test_addrspacecast_v2p0_to_v2p3
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>)
+    ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; VI: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[UV]](p0), 0
+    ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p0), [[C1]]
+    ; VI: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+    ; VI: [[C2:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; VI: [[C3:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; VI: [[EXTRACT1:%[0-9]+]]:_(p3) = G_EXTRACT [[UV1]](p0), 0
+    ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p0), [[C3]]
+    ; VI: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](s1), [[EXTRACT1]], [[C2]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[SELECT]](p3), [[SELECT1]](p3)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; GFX9-LABEL: name: test_addrspacecast_v2p0_to_v2p3
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>)
+    ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[UV]](p0), 0
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p0), [[C1]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+    ; GFX9: [[C2:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; GFX9: [[C3:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(p3) = G_EXTRACT [[UV1]](p0), 0
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p0), [[C3]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](s1), [[EXTRACT1]], [[C2]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[SELECT]](p3), [[SELECT1]](p3)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x p3>) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_v2p3_to_v2p0
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; VI-LABEL: name: test_addrspacecast_v2p3_to_v2p0
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
+    ; VI: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
+    ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; VI: [[C2:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+    ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[C2]], [[C3]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+    ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
+    ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+    ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
+    ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+    ; VI: [[C4:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; VI: [[C5:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; VI: [[C6:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559
+    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+    ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[C6]], [[C7]](s64)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+    ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C4]]
+    ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+    ; VI: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32)
+    ; VI: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
+    ; GFX9-LABEL: name: test_addrspacecast_v2p3_to_v2p0
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
+    ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
+    ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+    ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
+    ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+    ; GFX9: [[C3:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+    ; GFX9: [[C4:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+    ; GFX9: [[S_GETREG_B32_1:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_1]], [[C5]](s32)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C3]]
+    ; GFX9: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+    ; GFX9: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[SHL1]](s32)
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C4]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
+    %0:_(<2 x p3>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...