|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 | 
|  | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -early-live-intervals | FileCheck %s --check-prefixes=CHECK,SSE2 | 
|  | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,XOP | 
|  | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 | 
|  | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 | 
|  |  | 
|  | ; fold (rot (rot x, c1), c2) -> rot x, c1+c2 | 
|  | define <4 x i32> @combine_vec_rot_rot(<4 x i32> %x) { | 
|  | ; SSE2-LABEL: combine_vec_rot_rot: | 
|  | ; SSE2:       # %bb.0: | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] | 
|  | ; SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] | 
|  | ; SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; SSE2-NEXT:    por %xmm2, %xmm0 | 
|  | ; SSE2-NEXT:    retq | 
|  | ; | 
|  | ; XOP-LABEL: combine_vec_rot_rot: | 
|  | ; XOP:       # %bb.0: | 
|  | ; XOP-NEXT:    vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; XOP-NEXT:    retq | 
|  | ; | 
|  | ; AVX2-LABEL: combine_vec_rot_rot: | 
|  | ; AVX2:       # %bb.0: | 
|  | ; AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 | 
|  | ; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0 | 
|  | ; AVX2-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: combine_vec_rot_rot: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %1 = lshr <4 x i32> %x, <i32 1, i32 2, i32 3, i32 4> | 
|  | %2 = shl <4 x i32> %x, <i32 31, i32 30, i32 29, i32 28> | 
|  | %3 = or <4 x i32> %1, %2 | 
|  | %4 = lshr <4 x i32> %3, <i32 12, i32 13, i32 14, i32 15> | 
|  | %5 = shl <4 x i32> %3, <i32 20, i32 19, i32 18, i32 17> | 
|  | %6 = or <4 x i32> %4, %5 | 
|  | ret <4 x i32> %6 | 
|  | } | 
|  |  | 
|  | define <4 x i32> @combine_vec_rot_rot_splat(<4 x i32> %x) { | 
|  | ; SSE2-LABEL: combine_vec_rot_rot_splat: | 
|  | ; SSE2:       # %bb.0: | 
|  | ; SSE2-NEXT:    movdqa %xmm0, %xmm1 | 
|  | ; SSE2-NEXT:    psrld $25, %xmm1 | 
|  | ; SSE2-NEXT:    pslld $7, %xmm0 | 
|  | ; SSE2-NEXT:    por %xmm1, %xmm0 | 
|  | ; SSE2-NEXT:    retq | 
|  | ; | 
|  | ; XOP-LABEL: combine_vec_rot_rot_splat: | 
|  | ; XOP:       # %bb.0: | 
|  | ; XOP-NEXT:    vprotd $7, %xmm0, %xmm0 | 
|  | ; XOP-NEXT:    retq | 
|  | ; | 
|  | ; AVX2-LABEL: combine_vec_rot_rot_splat: | 
|  | ; AVX2:       # %bb.0: | 
|  | ; AVX2-NEXT:    vpsrld $25, %xmm0, %xmm1 | 
|  | ; AVX2-NEXT:    vpslld $7, %xmm0, %xmm0 | 
|  | ; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0 | 
|  | ; AVX2-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: combine_vec_rot_rot_splat: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vprold $7, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %1 = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> | 
|  | %2 = shl <4 x i32> %x, <i32 29, i32 29, i32 29, i32 29> | 
|  | %3 = or <4 x i32> %1, %2 | 
|  | %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> | 
|  | %5 = shl <4 x i32> %3, <i32 10, i32 10, i32 10, i32 10> | 
|  | %6 = or <4 x i32> %4, %5 | 
|  | ret <4 x i32> %6 | 
|  | } | 
|  |  | 
|  | define <4 x i32> @combine_vec_rot_rot_splat_zero(<4 x i32> %x) { | 
|  | ; CHECK-LABEL: combine_vec_rot_rot_splat_zero: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> | 
|  | %2 = shl <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> | 
|  | %3 = or <4 x i32> %1, %2 | 
|  | %4 = lshr <4 x i32> %3, <i32 31, i32 31, i32 31, i32 31> | 
|  | %5 = shl <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1> | 
|  | %6 = or <4 x i32> %4, %5 | 
|  | ret <4 x i32> %6 | 
|  | } | 
|  |  | 
|  | ; TODO - fold (select (icmp eq c, 0), x, (rot x, c)) -> rot x, c | 
|  | define i32 @combine_rot_select_zero(i32, i32) { | 
|  | ; CHECK-LABEL: combine_rot_select_zero: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movl %esi, %ecx | 
|  | ; CHECK-NEXT:    movl %edi, %eax | 
|  | ; CHECK-NEXT:    roll %cl, %eax | 
|  | ; CHECK-NEXT:    testl %esi, %esi | 
|  | ; CHECK-NEXT:    cmovel %edi, %eax | 
|  | ; CHECK-NEXT:    retq | 
|  | %3 = and i32 %1, 31 | 
|  | %4 = shl i32 %0, %3 | 
|  | %5 = sub i32 0, %1 | 
|  | %6 = and i32 %5, 31 | 
|  | %7 = lshr i32 %0, %6 | 
|  | %8 = or i32 %4, %7 | 
|  | %9 = icmp eq i32 %1, 0 | 
|  | %10 = select i1 %9, i32 %0, i32 %8 | 
|  | ret i32 %10 | 
|  | } | 
|  |  | 
|  | define <4 x i32> @combine_vec_rot_select_zero(<4 x i32>, <4 x i32>) { | 
|  | ; SSE2-LABEL: combine_vec_rot_select_zero: | 
|  | ; SSE2:       # %bb.0: | 
|  | ; SSE2-NEXT:    pxor %xmm2, %xmm2 | 
|  | ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm2 | 
|  | ; SSE2-NEXT:    pslld $23, %xmm1 | 
|  | ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1 | 
|  | ; SSE2-NEXT:    movdqa %xmm0, %xmm3 | 
|  | ; SSE2-NEXT:    pmuludq %xmm1, %xmm3 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] | 
|  | ; SSE2-NEXT:    pmuludq %xmm5, %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] | 
|  | ; SSE2-NEXT:    por %xmm4, %xmm3 | 
|  | ; SSE2-NEXT:    pand %xmm2, %xmm0 | 
|  | ; SSE2-NEXT:    pandn %xmm3, %xmm2 | 
|  | ; SSE2-NEXT:    por %xmm2, %xmm0 | 
|  | ; SSE2-NEXT:    retq | 
|  | ; | 
|  | ; XOP-LABEL: combine_vec_rot_select_zero: | 
|  | ; XOP:       # %bb.0: | 
|  | ; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2 | 
|  | ; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm3 | 
|  | ; XOP-NEXT:    vpcomeqd %xmm2, %xmm1, %xmm1 | 
|  | ; XOP-NEXT:    vblendvps %xmm1, %xmm0, %xmm3, %xmm0 | 
|  | ; XOP-NEXT:    retq | 
|  | ; | 
|  | ; AVX2-LABEL: combine_vec_rot_select_zero: | 
|  | ; AVX2:       # %bb.0: | 
|  | ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2 | 
|  | ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31] | 
|  | ; AVX2-NEXT:    vpand %xmm3, %xmm1, %xmm3 | 
|  | ; AVX2-NEXT:    vpsllvd %xmm3, %xmm0, %xmm4 | 
|  | ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm5 = [32,32,32,32] | 
|  | ; AVX2-NEXT:    vpsubd %xmm3, %xmm5, %xmm3 | 
|  | ; AVX2-NEXT:    vpsrlvd %xmm3, %xmm0, %xmm3 | 
|  | ; AVX2-NEXT:    vpor %xmm3, %xmm4, %xmm3 | 
|  | ; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1 | 
|  | ; AVX2-NEXT:    vblendvps %xmm1, %xmm0, %xmm3, %xmm0 | 
|  | ; AVX2-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: combine_vec_rot_select_zero: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vptestmd %xmm1, %xmm1, %k1 | 
|  | ; AVX512-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} | 
|  | ; AVX512-NEXT:    retq | 
|  | %3 = and <4 x i32> %1, <i32 31, i32 31, i32 31, i32 31> | 
|  | %4 = shl <4 x i32> %0, %3 | 
|  | %5 = sub <4 x i32> zeroinitializer, %1 | 
|  | %6 = and <4 x i32> %5, <i32 31, i32 31, i32 31, i32 31> | 
|  | %7 = lshr <4 x i32> %0, %6 | 
|  | %8 = or <4 x i32> %4, %7 | 
|  | %9 = icmp eq <4 x i32> %1, zeroinitializer | 
|  | %10 = select <4 x i1> %9, <4 x i32> %0, <4 x i32> %8 | 
|  | ret <4 x i32> %10 | 
|  | } | 
|  |  | 
|  | define <4 x i32> @rotate_demanded_bits(<4 x i32>, <4 x i32>) { | 
|  | ; SSE2-LABEL: rotate_demanded_bits: | 
|  | ; SSE2:       # %bb.0: | 
|  | ; SSE2-NEXT:    pslld $23, %xmm1 | 
|  | ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] | 
|  | ; SSE2-NEXT:    pmuludq %xmm1, %xmm0 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] | 
|  | ; SSE2-NEXT:    pmuludq %xmm2, %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; SSE2-NEXT:    por %xmm3, %xmm0 | 
|  | ; SSE2-NEXT:    retq | 
|  | ; | 
|  | ; XOP-LABEL: rotate_demanded_bits: | 
|  | ; XOP:       # %bb.0: | 
|  | ; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 | 
|  | ; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0 | 
|  | ; XOP-NEXT:    retq | 
|  | ; | 
|  | ; AVX2-LABEL: rotate_demanded_bits: | 
|  | ; AVX2:       # %bb.0: | 
|  | ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [30,30,30,30] | 
|  | ; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1 | 
|  | ; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 | 
|  | ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] | 
|  | ; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1 | 
|  | ; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0 | 
|  | ; AVX2-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rotate_demanded_bits: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 | 
|  | ; AVX512-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %3 = and <4 x i32> %1, <i32 30, i32 30, i32 30, i32 30> | 
|  | %4 = shl <4 x i32> %0, %3 | 
|  | %5 = sub nsw <4 x i32> zeroinitializer, %3 | 
|  | %6 = and <4 x i32> %5, <i32 30, i32 30, i32 30, i32 30> | 
|  | %7 = lshr <4 x i32> %0, %6 | 
|  | %8 = or <4 x i32> %7, %4 | 
|  | ret <4 x i32> %8 | 
|  | } | 
|  |  | 
|  | define <4 x i32> @rotate_demanded_bits_2(<4 x i32>, <4 x i32>) { | 
|  | ; SSE2-LABEL: rotate_demanded_bits_2: | 
|  | ; SSE2:       # %bb.0: | 
|  | ; SSE2-NEXT:    pslld $23, %xmm1 | 
|  | ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] | 
|  | ; SSE2-NEXT:    pmuludq %xmm1, %xmm0 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] | 
|  | ; SSE2-NEXT:    pmuludq %xmm2, %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; SSE2-NEXT:    por %xmm3, %xmm0 | 
|  | ; SSE2-NEXT:    retq | 
|  | ; | 
|  | ; XOP-LABEL: rotate_demanded_bits_2: | 
|  | ; XOP:       # %bb.0: | 
|  | ; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 | 
|  | ; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0 | 
|  | ; XOP-NEXT:    retq | 
|  | ; | 
|  | ; AVX2-LABEL: rotate_demanded_bits_2: | 
|  | ; AVX2:       # %bb.0: | 
|  | ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [23,23,23,23] | 
|  | ; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1 | 
|  | ; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 | 
|  | ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] | 
|  | ; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1 | 
|  | ; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0 | 
|  | ; AVX2-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rotate_demanded_bits_2: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 | 
|  | ; AVX512-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %3 = and <4 x i32> %1, <i32 23, i32 23, i32 23, i32 23> | 
|  | %4 = shl <4 x i32> %0, %3 | 
|  | %5 = sub nsw <4 x i32> zeroinitializer, %3 | 
|  | %6 = and <4 x i32> %5, <i32 31, i32 31, i32 31, i32 31> | 
|  | %7 = lshr <4 x i32> %0, %6 | 
|  | %8 = or <4 x i32> %7, %4 | 
|  | ret <4 x i32> %8 | 
|  | } | 
|  |  | 
|  | define <4 x i32> @rotate_demanded_bits_3(<4 x i32>, <4 x i32>) { | 
|  | ; SSE2-LABEL: rotate_demanded_bits_3: | 
|  | ; SSE2:       # %bb.0: | 
|  | ; SSE2-NEXT:    pslld $24, %xmm1 | 
|  | ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] | 
|  | ; SSE2-NEXT:    pmuludq %xmm1, %xmm0 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] | 
|  | ; SSE2-NEXT:    pmuludq %xmm2, %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; SSE2-NEXT:    por %xmm3, %xmm0 | 
|  | ; SSE2-NEXT:    retq | 
|  | ; | 
|  | ; XOP-LABEL: rotate_demanded_bits_3: | 
|  | ; XOP:       # %bb.0: | 
|  | ; XOP-NEXT:    vpaddd %xmm1, %xmm1, %xmm1 | 
|  | ; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0 | 
|  | ; XOP-NEXT:    retq | 
|  | ; | 
|  | ; AVX2-LABEL: rotate_demanded_bits_3: | 
|  | ; AVX2:       # %bb.0: | 
|  | ; AVX2-NEXT:    vpaddd %xmm1, %xmm1, %xmm1 | 
|  | ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] | 
|  | ; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1 | 
|  | ; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 | 
|  | ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] | 
|  | ; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1 | 
|  | ; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0 | 
|  | ; AVX2-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rotate_demanded_bits_3: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vpaddd %xmm1, %xmm1, %xmm1 | 
|  | ; AVX512-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %3 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> | 
|  | %4 = and <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30> | 
|  | %5 = shl <4 x i32> %0, %4 | 
|  | %6 = sub <4 x i32> zeroinitializer, %3 | 
|  | %7 = and <4 x i32> %6, <i32 30, i32 30, i32 30, i32 30> | 
|  | %8 = lshr <4 x i32> %0, %7 | 
|  | %9 = or <4 x i32> %5, %8 | 
|  | ret <4 x i32> %9 | 
|  | } | 
|  |  | 
|  | define <4 x i32> @rotl_binop_shuffle(<4 x i32>, <4 x i32>) { | 
|  | ; SSE2-LABEL: rotl_binop_shuffle: | 
|  | ; SSE2:       # %bb.0: | 
|  | ; SSE2-NEXT:    pslld $23, %xmm1 | 
|  | ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] | 
|  | ; SSE2-NEXT:    pmuludq %xmm1, %xmm0 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] | 
|  | ; SSE2-NEXT:    pmuludq %xmm2, %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] | 
|  | ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; SSE2-NEXT:    por %xmm3, %xmm0 | 
|  | ; SSE2-NEXT:    retq | 
|  | ; | 
|  | ; XOP-LABEL: rotl_binop_shuffle: | 
|  | ; XOP:       # %bb.0: | 
|  | ; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0 | 
|  | ; XOP-NEXT:    retq | 
|  | ; | 
|  | ; AVX2-LABEL: rotl_binop_shuffle: | 
|  | ; AVX2:       # %bb.0: | 
|  | ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] | 
|  | ; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1 | 
|  | ; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 | 
|  | ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] | 
|  | ; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1 | 
|  | ; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0 | 
|  | ; AVX2-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rotl_binop_shuffle: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %3 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | 
|  | %4 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | 
|  | %5 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %3, <4 x i32> %3, <4 x i32> %4) | 
|  | %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | 
|  | ret <4 x i32> %6 | 
|  | } | 
|  |  | 
|  | define <4 x i32> @rotr_binop_shuffle(<4 x i32>, <4 x i32>) { | 
|  | ; SSE2-LABEL: rotr_binop_shuffle: | 
|  | ; SSE2:       # %bb.0: | 
|  | ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] | 
|  | ; SSE2-NEXT:    psllq %xmm1, %xmm2 | 
|  | ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] | 
|  | ; SSE2-NEXT:    psllq %xmm1, %xmm0 | 
|  | ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] | 
|  | ; SSE2-NEXT:    retq | 
|  | ; | 
|  | ; XOP-LABEL: rotr_binop_shuffle: | 
|  | ; XOP:       # %bb.0: | 
|  | ; XOP-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] | 
|  | ; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0 | 
|  | ; XOP-NEXT:    retq | 
|  | ; | 
|  | ; AVX2-LABEL: rotr_binop_shuffle: | 
|  | ; AVX2:       # %bb.0: | 
|  | ; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 | 
|  | ; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] | 
|  | ; AVX2-NEXT:    vpsllq %xmm1, %xmm2, %xmm2 | 
|  | ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] | 
|  | ; AVX2-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 | 
|  | ; AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] | 
|  | ; AVX2-NEXT:    retq | 
|  | ; | 
|  | ; AVX512-LABEL: rotr_binop_shuffle: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vpbroadcastd %xmm1, %xmm1 | 
|  | ; AVX512-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 | 
|  | ; AVX512-NEXT:    retq | 
|  | %3 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | 
|  | %4 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer | 
|  | %5 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %3, <4 x i32> %3, <4 x i32> %4) | 
|  | %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | 
|  | ret <4 x i32> %6 | 
|  | } | 
|  |  | 
|  | ; OSS Fuzz: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=9935 | 
|  | define i32 @fuzz9935() { | 
|  | ; CHECK-LABEL: fuzz9935: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    movl $-1, %eax | 
|  | ; CHECK-NEXT:    retq | 
|  | %1 = trunc i40 549755813887 to i32 | 
|  | %2 = mul i32 %1, %1 | 
|  | %3 = lshr i32 %2, %1 | 
|  | %4 = or i32 %3, %2 | 
|  | ret i32 %4 | 
|  | } | 
|  |  | 
|  | ; Ensure we normalize the inner rotation before adding the results. | 
|  | define i5 @rotl_merge_i5(i5 %x) { | 
|  | ; CHECK-LABEL: rotl_merge_i5: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi | 
|  | ; CHECK-NEXT:    leal (,%rdi,4), %ecx | 
|  | ; CHECK-NEXT:    movl %edi, %eax | 
|  | ; CHECK-NEXT:    andb $24, %al | 
|  | ; CHECK-NEXT:    shrb $3, %al | 
|  | ; CHECK-NEXT:    orb %cl, %al | 
|  | ; CHECK-NEXT:    retq | 
|  | %r1 = call i5 @llvm.fshl.i5(i5 %x, i5 %x, i5 -1) | 
|  | %r2 = call i5 @llvm.fshl.i5(i5 %r1, i5 %r1, i5 1) | 
|  | ret i5 %r2 | 
|  | } | 
|  | declare i5 @llvm.fshl.i5(i5, i5, i5) | 
|  |  | 
|  | declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) | 
|  | declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) |