AMDGPU/GlobalISel: Fix bit count ops for non-power-of-2 types

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354587 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 0d0496e..dd05534 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -307,7 +307,9 @@
     .legalFor({{S32, S32}, {S32, S64}})
     .clampScalar(0, S32, S32)
     .clampScalar(1, S32, S64)
-    .scalarize(0);
+    .scalarize(0)
+    .widenScalarToNextPow2(0, 32)
+    .widenScalarToNextPow2(1, 32);
 
   // TODO: Expand for > s32
   getActionDefinitionsBuilder(G_BSWAP)
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
index ff93788..a1322c9 100644
--- a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
@@ -155,3 +155,61 @@
     %1:_(<2 x s16>) = G_CTLZ_ZERO_UNDEF %0
     $vgpr0 = COPY %1
 ...
+
+---
+name: ctlz_zero_undef_s7_s7
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: ctlz_zero_undef_s7_s7
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+    ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CHECK: $vgpr0 = COPY [[AND1]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s7) = G_TRUNC %0
+    %2:_(s7) = G_CTLZ_ZERO_UNDEF %1
+    %3:_(s32) = G_ZEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: ctlz_zero_undef_s33_s33
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: ctlz_zero_undef_s33_s33
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s64)
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
+    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+    ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
+    ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[AND1]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s33) = G_TRUNC %0
+    %2:_(s33) = G_CTLZ_ZERO_UNDEF %1
+    %3:_(s64) = G_ANYEXT %2
+    $vgpr0_vgpr1 = COPY %3
+...
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir
index 5cbdea9..41a5daa 100644
--- a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir
@@ -155,3 +155,61 @@
     %1:_(<2 x s16>) = G_CTLZ %0
     $vgpr0 = COPY %1
 ...
+
+---
+name: ctlz_s7_s7
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: ctlz_s7_s7
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+    ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CHECK: $vgpr0 = COPY [[AND1]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s7) = G_TRUNC %0
+    %2:_(s7) = G_CTLZ %1
+    %3:_(s32) = G_ZEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: ctlz_s33_s33
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: ctlz_s33_s33
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s64)
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
+    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+    ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
+    ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[AND1]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s33) = G_TRUNC %0
+    %2:_(s33) = G_CTLZ %1
+    %3:_(s64) = G_ANYEXT %2
+    $vgpr0_vgpr1 = COPY %3
+...
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir
index ebb59c1..7166235 100644
--- a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir
@@ -149,3 +149,53 @@
     %1:_(<2 x s16>) = G_CTPOP %0
     $vgpr0 = COPY %1
 ...
+
+---
+name: ctpop_s7_s7
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: ctpop_s7_s7
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CHECK: $vgpr0 = COPY [[AND1]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s7) = G_TRUNC %0
+    %2:_(s7) = G_CTPOP %1
+    %3:_(s32) = G_ZEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: ctpop_s33_s33
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: ctpop_s33_s33
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64)
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTPOP]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C1]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[AND1]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s33) = G_TRUNC %0
+    %2:_(s33) = G_CTPOP %1
+    %3:_(s64) = G_ANYEXT %2
+    $vgpr0_vgpr1 = COPY %3
+...
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir
index ba3516b..800a64e 100644
--- a/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir
@@ -149,3 +149,53 @@
     %1:_(<2 x s16>) = G_CTTZ_ZERO_UNDEF %0
     $vgpr0 = COPY %1
 ...
+
+---
+name: cttz_zero_undef_s7_s7
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: cttz_zero_undef_s7_s7
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CHECK: $vgpr0 = COPY [[AND1]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s7) = G_TRUNC %0
+    %2:_(s7) = G_CTTZ_ZERO_UNDEF %1
+    %3:_(s32) = G_ZEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: cttz_zero_undef_s33_s33
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: cttz_zero_undef_s33_s33
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s64)
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C1]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[AND1]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s33) = G_TRUNC %0
+    %2:_(s33) = G_CTTZ_ZERO_UNDEF %1
+    %3:_(s64) = G_ANYEXT %2
+    $vgpr0_vgpr1 = COPY %3
+...
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
index a73f5af..2e9a40a 100644
--- a/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
@@ -155,3 +155,57 @@
     %1:_(<2 x s16>) = G_CTTZ %0
     $vgpr0 = COPY %1
 ...
+
+---
+name: cttz_s7_s7
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: cttz_s7_s7
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]]
+    ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CHECK: $vgpr0 = COPY [[AND1]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s7) = G_TRUNC %0
+    %2:_(s7) = G_CTTZ %1
+    %3:_(s32) = G_ZEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: cttz_s33_s33
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: cttz_s33_s33
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934592
+    ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[C1]]
+    ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s64)
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ]](s32)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[AND1]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s33) = G_TRUNC %0
+    %2:_(s33) = G_CTTZ %1
+    %3:_(s64) = G_ANYEXT %2
+    $vgpr0_vgpr1 = COPY %3
+...