| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s |
| |
| define <16 x i32> @pr174871(<16 x i32> %a, <16 x i1> %__mask) local_unnamed_addr { |
| ; CHECK-LABEL: pr174871: |
| ; CHECK: # %bb.0: # %allocas |
| ; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 |
| ; CHECK-NEXT: vpmovb2m %xmm1, %k0 |
| ; CHECK-NEXT: kmovd %k0, %eax |
| ; CHECK-NEXT: andl $65534, %eax # imm = 0xFFFE |
| ; CHECK-NEXT: je .LBB0_1 |
| ; CHECK-NEXT: # %bb.2: # %for_loop.lr.ph |
| ; CHECK-NEXT: vpternlogd {{.*#+}} zmm2 = -1 |
| ; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm3 |
| ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm4 |
| ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm5 |
| ; CHECK-NEXT: vpsubd %zmm2, %zmm0, %zmm6 |
| ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm7 |
| ; CHECK-NEXT: vpaddd %zmm0, %zmm0, %zmm8 |
| ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm9 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] |
| ; CHECK-NEXT: movw $-2, %cx |
| ; CHECK-NEXT: kmovd %ecx, %k1 |
| ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm10 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] |
| ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm11 = [4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284] |
| ; CHECK-NEXT: vpxor %xmm12, %xmm12, %xmm12 |
| ; CHECK-NEXT: jmp .LBB0_3 |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: .LBB0_4: # %switch_done |
| ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 |
| ; CHECK-NEXT: vpsubd %zmm2, %zmm12, %zmm12 |
| ; CHECK-NEXT: vpcmpltud %zmm10, %zmm12, %k1 {%k1} |
| ; CHECK-NEXT: kandw %k1, %k0, %k2 |
| ; CHECK-NEXT: kmovd %k2, %eax |
| ; CHECK-NEXT: ktestw %k1, %k0 |
| ; CHECK-NEXT: je .LBB0_5 |
| ; CHECK-NEXT: .LBB0_3: # %for_loop |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vpcmpltud %zmm9, %zmm3, %k2 {%k1} |
| ; CHECK-NEXT: vpaddd %zmm3, %zmm1, %zmm1 {%k2} |
| ; CHECK-NEXT: kandw %k2, %k0, %k3 |
| ; CHECK-NEXT: kmovd %k3, %ecx |
| ; CHECK-NEXT: cmpw %cx, %ax |
| ; CHECK-NEXT: je .LBB0_4 |
| ; CHECK-NEXT: # %bb.6: # %not_all_continued_or_breaked |
| ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 |
| ; CHECK-NEXT: vpcmpltud %zmm11, %zmm4, %k3 {%k1} |
| ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm1 {%k3} |
| ; CHECK-NEXT: korw %k3, %k2, %k2 |
| ; CHECK-NEXT: kandw %k2, %k0, %k3 |
| ; CHECK-NEXT: kmovd %k3, %ecx |
| ; CHECK-NEXT: cmpw %cx, %ax |
| ; CHECK-NEXT: je .LBB0_4 |
| ; CHECK-NEXT: # %bb.7: # %not_all_continued_or_breaked95 |
| ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 |
| ; CHECK-NEXT: vpcmpltud %zmm9, %zmm5, %k3 {%k1} |
| ; CHECK-NEXT: vpaddd %zmm6, %zmm1, %zmm1 {%k3} |
| ; CHECK-NEXT: korw %k2, %k3, %k2 |
| ; CHECK-NEXT: kandw %k2, %k0, %k2 |
| ; CHECK-NEXT: kmovd %k2, %ecx |
| ; CHECK-NEXT: cmpw %cx, %ax |
| ; CHECK-NEXT: je .LBB0_4 |
| ; CHECK-NEXT: # %bb.8: # %not_all_continued_or_breaked135 |
| ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 |
| ; CHECK-NEXT: vpcmpltud %zmm9, %zmm7, %k2 {%k1} |
| ; CHECK-NEXT: vpaddd %zmm8, %zmm1, %zmm1 {%k2} |
| ; CHECK-NEXT: jmp .LBB0_4 |
| ; CHECK-NEXT: .LBB0_5: # %for_exit |
| ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 |
| ; CHECK-NEXT: retq |
| ; CHECK-NEXT: .LBB0_1: |
| ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; CHECK-NEXT: retq |
| allocas: |
| %"internal_mask&function_mask7208" = and <16 x i1> %__mask, <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> |
| %mask_i16.i.i209 = bitcast <16 x i1> %"internal_mask&function_mask7208" to i16 |
| %res.i.not210 = icmp eq i16 %mask_i16.i.i209, 0 |
| br i1 %res.i.not210, label %for_exit, label %for_loop.lr.ph |
| |
| for_loop.lr.ph: |
| %0 = add <16 x i32> %a, splat (i32 -1) |
| %"entry_mask&case_match32185" = icmp ult <16 x i32> %0, splat (i32 4) |
| %1 = add <16 x i32> %a, splat (i32 -13) |
| %2 = icmp ult <16 x i32> %1, splat (i32 -12) |
| %3 = add <16 x i32> %a, splat (i32 -5) |
| %"entry_mask&case_match115188" = icmp ult <16 x i32> %3, splat (i32 4) |
| %add_a_load120_ = add nsw <16 x i32> %a, splat (i32 1) |
| %4 = add <16 x i32> %a, splat (i32 -9) |
| %"entry_mask&case_match155191" = icmp ult <16 x i32> %4, splat (i32 4) |
| %mul_a_load160_ = shl nsw <16 x i32> %a, splat (i32 1) |
| br label %for_loop |
| |
| for_loop: |
| %mask_i16.i.i214 = phi i16 [ %mask_i16.i.i209, %for_loop.lr.ph ], [ %mask_i16.i.i, %switch_done ] |
| %"oldMask&test213" = phi <16 x i1> [ <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, %for_loop.lr.ph ], [ %"oldMask&test", %switch_done ] |
| %i.0212 = phi <16 x i32> [ zeroinitializer, %for_loop.lr.ph ], [ %i_load170_plus1, %switch_done ] |
| %res.0211 = phi <16 x i32> [ zeroinitializer, %for_loop.lr.ph ], [ %res.1, %switch_done ] |
| %"mask|case_match34" = and <16 x i1> %"entry_mask&case_match32185", %"oldMask&test213" |
| %add_res_load_sub_a_load37_ = select <16 x i1> %"mask|case_match34", <16 x i32> %0, <16 x i32> zeroinitializer |
| %5 = add nsw <16 x i32> %add_res_load_sub_a_load37_, %res.0211 |
| %"finished&func" = and <16 x i1> %__mask, %"mask|case_match34" |
| %mask_i16.i.i192 = bitcast <16 x i1> %"finished&func" to i16 |
| %"equal_finished&func_internal_mask&function_mask13" = icmp eq i16 %mask_i16.i.i214, %mask_i16.i.i192 |
| br i1 %"equal_finished&func_internal_mask&function_mask13", label %switch_done, label %not_all_continued_or_breaked |
| |
| switch_done: |
| %res.1 = phi <16 x i32> [ %5, %for_loop ], [ %6, %not_all_continued_or_breaked ], [ %7, %not_all_continued_or_breaked95 ], [ %8, %not_all_continued_or_breaked135 ] |
| %i_load170_plus1 = add nuw nsw <16 x i32> %i.0212, splat (i32 1) |
| %less_i_load_ = icmp samesign ult <16 x i32> %i_load170_plus1, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %"oldMask&test" = and <16 x i1> %"oldMask&test213", %less_i_load_ |
| %"internal_mask&function_mask7" = and <16 x i1> %__mask, %"oldMask&test" |
| %mask_i16.i.i = bitcast <16 x i1> %"internal_mask&function_mask7" to i16 |
| %res.i.not = icmp eq i16 %mask_i16.i.i, 0 |
| br i1 %res.i.not, label %for_exit, label %for_loop |
| |
| for_exit: |
| %res.0.lcssa = phi <16 x i32> [ zeroinitializer, %allocas ], [ %res.1, %switch_done ] |
| ret <16 x i32> %res.0.lcssa |
| |
| not_all_continued_or_breaked: |
| %"default&~case_match76" = and <16 x i1> %2, %"oldMask&test213" |
| %add_res_load82_a_load80 = select <16 x i1> %"default&~case_match76", <16 x i32> %a, <16 x i32> zeroinitializer |
| %6 = add nsw <16 x i32> %5, %add_res_load82_a_load80 |
| %"mask|break_mask86" = or <16 x i1> %"mask|case_match34", %"default&~case_match76" |
| %"finished&func92" = and <16 x i1> %__mask, %"mask|break_mask86" |
| %mask_i16.i.i196 = bitcast <16 x i1> %"finished&func92" to i16 |
| %"equal_finished&func92_internal_mask&function_mask13" = icmp eq i16 %mask_i16.i.i214, %mask_i16.i.i196 |
| br i1 %"equal_finished&func92_internal_mask&function_mask13", label %switch_done, label %not_all_continued_or_breaked95 |
| |
| not_all_continued_or_breaked95: |
| %"mask|case_match117" = and <16 x i1> %"entry_mask&case_match115188", %"oldMask&test213" |
| %add_res_load122_add_a_load120_ = select <16 x i1> %"mask|case_match117", <16 x i32> %add_a_load120_, <16 x i32> zeroinitializer |
| %7 = add nsw <16 x i32> %6, %add_res_load122_add_a_load120_ |
| %"mask|break_mask126" = or <16 x i1> %"mask|case_match117", %"mask|break_mask86" |
| %"finished&func132" = and <16 x i1> %__mask, %"mask|break_mask126" |
| %mask_i16.i.i198 = bitcast <16 x i1> %"finished&func132" to i16 |
| %"equal_finished&func132_internal_mask&function_mask13" = icmp eq i16 %mask_i16.i.i214, %mask_i16.i.i198 |
| br i1 %"equal_finished&func132_internal_mask&function_mask13", label %switch_done, label %not_all_continued_or_breaked135 |
| |
| not_all_continued_or_breaked135: |
| %"mask|case_match157" = and <16 x i1> %"entry_mask&case_match155191", %"oldMask&test213" |
| %add_res_load162_mul_a_load160_ = select <16 x i1> %"mask|case_match157", <16 x i32> %mul_a_load160_, <16 x i32> zeroinitializer |
| %8 = add nsw <16 x i32> %7, %add_res_load162_mul_a_load160_ |
| br label %switch_done |
| } |
| |