llvm/test/Transforms/AggressiveInstCombine/popcount.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s

 ;int popcount8(unsigned char i) {
 ;  i = i - ((i >> 1) & 0x55);
 ;  i = (i & 0x33) + ((i >> 2) & 0x33);
 ;  i = ((i + (i >> 4)) & 0x0F);
 ; return (i * 0x01010101);
 ;}
 define signext i32 @popcount8(i8 zeroext %0) {
 ; CHECK-LABEL: @popcount8(
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr i8 [[TMP0:%.*]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 85
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub i8 [[TMP0]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i8 [[TMP4]], 51
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i8 [[TMP4]], 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], 51
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nuw nsw i8 [[TMP7]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = lshr i8 [[TMP8]], 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nuw nsw i8 [[TMP9]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], 15
 ; CHECK-NEXT:    [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP12]]
 ;
   %2 = lshr i8 %0, 1
   %3 = and i8 %2, 85
   %4 = sub i8 %0, %3
   %5 = and i8 %4, 51
   %6 = lshr i8 %4, 2
   %7 = and i8 %6, 51
   %8 = add nuw nsw i8 %7, %5
   %9 = lshr i8 %8, 4
   %10 = add nuw nsw i8 %9, %8
   %11 = and i8 %10, 15
   %12 = zext i8 %11 to i32
   ret i32 %12
 }

 ;int popcount32(unsigned i) {
 ;  i = i - ((i >> 1) & 0x55555555);
 ;  i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
 ;  i = ((i + (i >> 4)) & 0x0F0F0F0F);
 ; return (i * 0x01010101) >> 24;
 ;}
 define signext i32 @popcount32(i32 zeroext %0) {
 ; CHECK-LABEL: @popcount32(
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0:%.*]])
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %2 = lshr i32 %0, 1
   %3 = and i32 %2, 1431655765
   %4 = sub i32 %0, %3
   %5 = and i32 %4, 858993459
   %6 = lshr i32 %4, 2
   %7 = and i32 %6, 858993459
   %8 = add nuw nsw i32 %7, %5
   %9 = lshr i32 %8, 4
   %10 = add nuw nsw i32 %9, %8
   %11 = and i32 %10, 252645135
   %12 = mul i32 %11, 16843009
   %13 = lshr i32 %12, 24
   ret i32 %13
 }

 ;int popcount64(unsigned long long i) {
 ;  i = i - ((i >> 1) & 0x5555555555555555);
 ;  i = (i & 0x3333333333333333) + ((i >> 2) & 0x3333333333333333);
 ;  i = ((i + (i >> 4)) & 0x0F0F0F0F0F0F0F0F);
 ; return (i * 0x0101010101010101) >> 56;
 ;}
 define signext i32 @popcount64(i64 %0) {
 ; CHECK-LABEL: @popcount64(
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0:%.*]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = lshr i64 %0, 1
   %3 = and i64 %2, 6148914691236517205
   %4 = sub i64 %0, %3
   %5 = and i64 %4, 3689348814741910323
   %6 = lshr i64 %4, 2
   %7 = and i64 %6, 3689348814741910323
   %8 = add nuw nsw i64 %7, %5
   %9 = lshr i64 %8, 4
   %10 = add nuw nsw i64 %9, %8
   %11 = and i64 %10, 1085102592571150095
   %12 = mul i64 %11, 72340172838076673
   %13 = lshr i64 %12, 56
   %14 = trunc i64 %13 to i32
   ret i32 %14
 }

 ;int popcount128(__uint128_t i) {
 ;  __uint128_t x = 0x5555555555555555;
 ;  x <<= 64;
 ;  x |= 0x5555555555555555;
 ;  __uint128_t y = 0x3333333333333333;
 ;  y <<= 64;
 ;  y |= 0x3333333333333333;
 ;  __uint128_t z = 0x0f0f0f0f0f0f0f0f;
 ;  z <<= 64;
 ;  z |= 0x0f0f0f0f0f0f0f0f;
 ;  __uint128_t a = 0x0101010101010101;
 ;  a <<= 64;
 ;  a |= 0x0101010101010101;
 ;  unsigned mask = 120;
 ;  i = i - ((i >> 1) & x);
 ;  i = (i & y) + ((i >> 2) & y);
 ;  i = ((i + (i >> 4)) & z);
 ;  return (i * a) >> mask;
 ;}
 define signext i32 @popcount128(i128 %0) {
 ; CHECK-LABEL: @popcount128(
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i128 @llvm.ctpop.i128(i128 [[TMP0:%.*]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = lshr i128 %0, 1
   %3 = and i128 %2, 113427455640312821154458202477256070485
   %4 = sub i128 %0, %3
   %5 = and i128 %4, 68056473384187692692674921486353642291
   %6 = lshr i128 %4, 2
   %7 = and i128 %6, 68056473384187692692674921486353642291
   %8 = add nuw nsw i128 %7, %5
   %9 = lshr i128 %8, 4
   %10 = add nuw nsw i128 %9, %8
   %11 = and i128 %10, 20016609818878733144904388672456953615
   %12 = mul i128 %11, 1334440654591915542993625911497130241
   %13 = lshr i128 %12, 120
   %14 = trunc i128 %13 to i32
   ret i32 %14
 }

 ;vector unsigned char popcount8vec(vector unsigned char i)
 ;{
 ;  i = i - ((i>> 1) & 0x55);
 ;  i = (i & 0x33) + ((i >> 2) & 0x33);
 ;  i = ((i + (i >> 4)) & 0x0F);
 ;  return (i * 0x01);
 ;}
 define <16 x i8> @popcount8vec(<16 x i8> %0) {
 ; CHECK-LABEL: @popcount8vec(
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr <16 x i8> [[TMP0:%.*]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i8> [[TMP2]], <i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85>
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <16 x i8> [[TMP0]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and <16 x i8> [[TMP4]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-NEXT:    [[TMP7:%.*]] = and <16 x i8> [[TMP6]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nuw nsw <16 x i8> [[TMP7]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = lshr <16 x i8> [[TMP8]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nuw nsw <16 x i8> [[TMP9]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = and <16 x i8> [[TMP10]], <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>
 ; CHECK-NEXT:    ret <16 x i8> [[TMP11]]
 ;
   %2 = lshr <16 x i8> %0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = and <16 x i8> %2, <i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85>
   %4 = sub <16 x i8> %0, %3
   %5 = and <16 x i8> %4, <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
   %6 = lshr <16 x i8> %4, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   %7 = and <16 x i8> %6, <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
   %8 = add nuw nsw <16 x i8> %7, %5
   %9 = lshr <16 x i8> %8, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
   %10 = add nuw nsw <16 x i8> %9, %8
   %11 = and <16 x i8> %10, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>
   ret <16 x i8> %11
 }

 ;vector unsigned int popcount32vec(vector unsigned int i)
 ;{
 ;  i = i - ((i>> 1) & 0x55555555);
 ;  i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
 ;  i = ((i + (i >> 4)) & 0x0F0F0F0F);
 ;  return (i * 0x01010101) >> 24;
 ;}
 define <4 x i32> @popcount32vec(<4 x i32> %0) {
 ; CHECK-LABEL: @popcount32vec(
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP0:%.*]])
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %2 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
   %3 = and <4 x i32> %2, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
   %4 = sub <4 x i32> %0, %3
   %5 = and <4 x i32> %4, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
   %6 = lshr <4 x i32> %4, <i32 2, i32 2, i32 2, i32 2>
   %7 = and <4 x i32> %6, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
   %8 = add nuw nsw <4 x i32> %7, %5
   %9 = lshr <4 x i32> %8, <i32 4, i32 4, i32 4, i32 4>
   %10 = add nuw nsw <4 x i32> %9, %8
   %11 = and <4 x i32> %10, <i32 252645135, i32 252645135, i32 252645135, i32 252645135>
   %12 = mul <4 x i32> %11, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
   %13 = lshr <4 x i32> %12, <i32 24, i32 24, i32 24, i32 24>
   ret <4 x i32> %13
 }
	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt < %s -passes=aggressive-instcombine -S \| FileCheck %s

	;int popcount8(unsigned char i) {
	; i = i - ((i >> 1) & 0x55);
	; i = (i & 0x33) + ((i >> 2) & 0x33);
	; i = ((i + (i >> 4)) & 0x0F);
	; return (i * 0x01010101);
	;}
	define signext i32 @popcount8(i8 zeroext %0) {
	; CHECK-LABEL: @popcount8(
	; CHECK-NEXT: [[TMP2:%.]] = lshr i8 [[TMP0:%.]], 1
	; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], 85
	; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[TMP0]], [[TMP3]]
	; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[TMP4]], 51
	; CHECK-NEXT: [[TMP6:%.*]] = lshr i8 [[TMP4]], 2
	; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], 51
	; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i8 [[TMP7]], [[TMP5]]
	; CHECK-NEXT: [[TMP9:%.*]] = lshr i8 [[TMP8]], 4
	; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i8 [[TMP9]], [[TMP8]]
	; CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], 15
	; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
	; CHECK-NEXT: ret i32 [[TMP12]]
	;
	%2 = lshr i8 %0, 1
	%3 = and i8 %2, 85
	%4 = sub i8 %0, %3
	%5 = and i8 %4, 51
	%6 = lshr i8 %4, 2
	%7 = and i8 %6, 51
	%8 = add nuw nsw i8 %7, %5
	%9 = lshr i8 %8, 4
	%10 = add nuw nsw i8 %9, %8
	%11 = and i8 %10, 15
	%12 = zext i8 %11 to i32
	ret i32 %12
	}

	;int popcount32(unsigned i) {
	; i = i - ((i >> 1) & 0x55555555);
	; i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
	; i = ((i + (i >> 4)) & 0x0F0F0F0F);
	; return (i * 0x01010101) >> 24;
	;}
	define signext i32 @popcount32(i32 zeroext %0) {
	; CHECK-LABEL: @popcount32(
	; CHECK-NEXT: [[TMP2:%.]] = call i32 @llvm.ctpop.i32(i32 [[TMP0:%.]])
	; CHECK-NEXT: ret i32 [[TMP2]]
	;
	%2 = lshr i32 %0, 1
	%3 = and i32 %2, 1431655765
	%4 = sub i32 %0, %3
	%5 = and i32 %4, 858993459
	%6 = lshr i32 %4, 2
	%7 = and i32 %6, 858993459
	%8 = add nuw nsw i32 %7, %5
	%9 = lshr i32 %8, 4
	%10 = add nuw nsw i32 %9, %8
	%11 = and i32 %10, 252645135
	%12 = mul i32 %11, 16843009
	%13 = lshr i32 %12, 24
	ret i32 %13
	}

	;int popcount64(unsigned long long i) {
	; i = i - ((i >> 1) & 0x5555555555555555);
	; i = (i & 0x3333333333333333) + ((i >> 2) & 0x3333333333333333);
	; i = ((i + (i >> 4)) & 0x0F0F0F0F0F0F0F0F);
	; return (i * 0x0101010101010101) >> 56;
	;}
	define signext i32 @popcount64(i64 %0) {
	; CHECK-LABEL: @popcount64(
	; CHECK-NEXT: [[TMP2:%.]] = call i64 @llvm.ctpop.i64(i64 [[TMP0:%.]])
	; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
	; CHECK-NEXT: ret i32 [[TMP3]]
	;
	%2 = lshr i64 %0, 1
	%3 = and i64 %2, 6148914691236517205
	%4 = sub i64 %0, %3
	%5 = and i64 %4, 3689348814741910323
	%6 = lshr i64 %4, 2
	%7 = and i64 %6, 3689348814741910323
	%8 = add nuw nsw i64 %7, %5
	%9 = lshr i64 %8, 4
	%10 = add nuw nsw i64 %9, %8
	%11 = and i64 %10, 1085102592571150095
	%12 = mul i64 %11, 72340172838076673
	%13 = lshr i64 %12, 56
	%14 = trunc i64 %13 to i32
	ret i32 %14
	}

	;int popcount128(__uint128_t i) {
	; __uint128_t x = 0x5555555555555555;
	; x <<= 64;
	; x \|= 0x5555555555555555;
	; __uint128_t y = 0x3333333333333333;
	; y <<= 64;
	; y \|= 0x3333333333333333;
	; __uint128_t z = 0x0f0f0f0f0f0f0f0f;
	; z <<= 64;
	; z \|= 0x0f0f0f0f0f0f0f0f;
	; __uint128_t a = 0x0101010101010101;
	; a <<= 64;
	; a \|= 0x0101010101010101;
	; unsigned mask = 120;
	; i = i - ((i >> 1) & x);
	; i = (i & y) + ((i >> 2) & y);
	; i = ((i + (i >> 4)) & z);
	; return (i * a) >> mask;
	;}
	define signext i32 @popcount128(i128 %0) {
	; CHECK-LABEL: @popcount128(
	; CHECK-NEXT: [[TMP2:%.]] = call i128 @llvm.ctpop.i128(i128 [[TMP0:%.]])
	; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32
	; CHECK-NEXT: ret i32 [[TMP3]]
	;
	%2 = lshr i128 %0, 1
	%3 = and i128 %2, 113427455640312821154458202477256070485
	%4 = sub i128 %0, %3
	%5 = and i128 %4, 68056473384187692692674921486353642291
	%6 = lshr i128 %4, 2
	%7 = and i128 %6, 68056473384187692692674921486353642291
	%8 = add nuw nsw i128 %7, %5
	%9 = lshr i128 %8, 4
	%10 = add nuw nsw i128 %9, %8
	%11 = and i128 %10, 20016609818878733144904388672456953615
	%12 = mul i128 %11, 1334440654591915542993625911497130241
	%13 = lshr i128 %12, 120
	%14 = trunc i128 %13 to i32
	ret i32 %14
	}

	;vector unsigned char popcount8vec(vector unsigned char i)
	;{
	; i = i - ((i>> 1) & 0x55);
	; i = (i & 0x33) + ((i >> 2) & 0x33);
	; i = ((i + (i >> 4)) & 0x0F);
	; return (i * 0x01);
	;}
	define <16 x i8> @popcount8vec(<16 x i8> %0) {
	; CHECK-LABEL: @popcount8vec(
	; CHECK-NEXT: [[TMP2:%.]] = lshr <16 x i8> [[TMP0:%.]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
	; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], <i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85>
	; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i8> [[TMP0]], [[TMP3]]
	; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i8> [[TMP4]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
	; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
	; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i8> [[TMP6]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
	; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i8> [[TMP7]], [[TMP5]]
	; CHECK-NEXT: [[TMP9:%.*]] = lshr <16 x i8> [[TMP8]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
	; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw <16 x i8> [[TMP9]], [[TMP8]]
	; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP10]], <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>
	; CHECK-NEXT: ret <16 x i8> [[TMP11]]
	;
	%2 = lshr <16 x i8> %0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
	%3 = and <16 x i8> %2, <i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85>
	%4 = sub <16 x i8> %0, %3
	%5 = and <16 x i8> %4, <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
	%6 = lshr <16 x i8> %4, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
	%7 = and <16 x i8> %6, <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
	%8 = add nuw nsw <16 x i8> %7, %5
	%9 = lshr <16 x i8> %8, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
	%10 = add nuw nsw <16 x i8> %9, %8
	%11 = and <16 x i8> %10, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>
	ret <16 x i8> %11
	}

	;vector unsigned int popcount32vec(vector unsigned int i)
	;{
	; i = i - ((i>> 1) & 0x55555555);
	; i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
	; i = ((i + (i >> 4)) & 0x0F0F0F0F);
	; return (i * 0x01010101) >> 24;
	;}
	define <4 x i32> @popcount32vec(<4 x i32> %0) {
	; CHECK-LABEL: @popcount32vec(
	; CHECK-NEXT: [[TMP2:%.]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP0:%.]])
	; CHECK-NEXT: ret <4 x i32> [[TMP2]]
	;
	%2 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
	%3 = and <4 x i32> %2, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
	%4 = sub <4 x i32> %0, %3
	%5 = and <4 x i32> %4, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
	%6 = lshr <4 x i32> %4, <i32 2, i32 2, i32 2, i32 2>
	%7 = and <4 x i32> %6, <i32 858993459, i32 858993459, i32 858993459, i32 858993459>
	%8 = add nuw nsw <4 x i32> %7, %5
	%9 = lshr <4 x i32> %8, <i32 4, i32 4, i32 4, i32 4>
	%10 = add nuw nsw <4 x i32> %9, %8
	%11 = and <4 x i32> %10, <i32 252645135, i32 252645135, i32 252645135, i32 252645135>
	%12 = mul <4 x i32> %11, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
	%13 = lshr <4 x i32> %12, <i32 24, i32 24, i32 24, i32 24>
	ret <4 x i32> %13
	}