AMDGPU/GlobalISel: Split 64-bit G_CTPOP in RegBankSelect
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index fd24813..d8e3269 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2084,6 +2084,29 @@
     MI.eraseFromParent();
     return;
   }
+  case AMDGPU::G_CTPOP: {
+    MachineIRBuilder B(MI);
+    MachineFunction &MF = B.getMF();
+
+    const RegisterBank *DstBank =
+      OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+    if (DstBank == &AMDGPU::SGPRRegBank)
+      break;
+
+    Register SrcReg = MI.getOperand(1).getReg();
+    const LLT S32 = LLT::scalar(32);
+    LLT Ty = MRI.getType(SrcReg);
+    if (Ty == S32)
+      break;
+
+    ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
+    GISelObserverWrapper Observer(&ApplyVALU);
+    LegalizerHelper Helper(MF, Observer, B);
+
+    if (Helper.narrowScalar(MI, 1, S32) != LegalizerHelper::Legalized)
+      llvm_unreachable("widenScalar should have succeeded");
+    return;
+  }
   case AMDGPU::G_SEXT:
   case AMDGPU::G_ZEXT: {
     Register SrcReg = MI.getOperand(1).getReg();
@@ -3172,9 +3195,6 @@
   case AMDGPU::G_BITCAST:
   case AMDGPU::G_INTTOPTR:
   case AMDGPU::G_PTRTOINT:
-  case AMDGPU::G_CTLZ_ZERO_UNDEF:
-  case AMDGPU::G_CTTZ_ZERO_UNDEF:
-  case AMDGPU::G_CTPOP:
   case AMDGPU::G_BSWAP:
   case AMDGPU::G_BITREVERSE:
   case AMDGPU::G_FABS:
@@ -3184,6 +3204,21 @@
     OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
     break;
   }
+  case AMDGPU::G_CTLZ:
+  case AMDGPU::G_CTLZ_ZERO_UNDEF:
+  case AMDGPU::G_CTTZ:
+  case AMDGPU::G_CTTZ_ZERO_UNDEF:
+  case AMDGPU::G_CTPOP: {
+    unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+    OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
+
+    // This should really be getValueMappingSGPR64Only, but allowing the generic
+    // code to handle the register split just makes using LegalizerHelper more
+    // difficult.
+    OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
+    break;
+  }
   case AMDGPU::G_TRUNC: {
     Register Dst = MI.getOperand(0).getReg();
     Register Src = MI.getOperand(1).getReg();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir
index e469437..30a367f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir
@@ -3,29 +3,70 @@
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
 
 ---
-name: ctpop_i32_s
+name: ctpop_s32_s
 legalized: true
 
 body: |
   bb.0:
     liveins: $sgpr0
-    ; CHECK-LABEL: name: ctpop_i32_s
+    ; CHECK-LABEL: name: ctpop_s32_s
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]]
+    ; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = G_CTPOP %0
+    S_ENDPGM 0, implicit %1
 ...
 
 ---
-name: ctpop_i32_v
+name: ctpop_s32_v
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: ctpop_s32_v
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_CTPOP %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: ctpop_s64_s
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+
+    ; CHECK-LABEL: name: ctpop_s64_s
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+    ; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s64)
+    ; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
+    %0:_(s64) = COPY $sgpr0_sgpr1
+    %1:_(s32) = G_CTPOP %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: ctpop_s64_v
 legalized: true
 
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1
-    ; CHECK-LABEL: name: ctpop_i32_v
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]]
-    %0:_(s32) = COPY $vgpr0
+
+    ; CHECK-LABEL: name: ctpop_s64_v
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV]](s32)
+    ; CHECK: [[CTPOP1:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV1]](s32)
+    ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTPOP1]], [[CTPOP]]
+    ; CHECK: S_ENDPGM 0, implicit [[ADD]](s32)
+    %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_CTPOP %0
+    S_ENDPGM 0, implicit %1
 ...