blob: 9d671a9a1b8d298cdc7963ad71fb80b01ee902cf [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
define <16 x i32> @pr174871(<16 x i32> %a, <16 x i1> %__mask) local_unnamed_addr {
; CHECK-LABEL: pr174871:
; CHECK: # %bb.0: # %allocas
; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1
; CHECK-NEXT: vpmovb2m %xmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: andl $65534, %eax # imm = 0xFFFE
; CHECK-NEXT: je .LBB0_1
; CHECK-NEXT: # %bb.2: # %for_loop.lr.ph
; CHECK-NEXT: vpternlogd {{.*#+}} zmm2 = -1
; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm3
; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm4
; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm5
; CHECK-NEXT: vpsubd %zmm2, %zmm0, %zmm6
; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm7
; CHECK-NEXT: vpaddd %zmm0, %zmm0, %zmm8
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm9 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
; CHECK-NEXT: movw $-2, %cx
; CHECK-NEXT: kmovd %ecx, %k1
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm10 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm11 = [4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284,4294967284]
; CHECK-NEXT: vpxor %xmm12, %xmm12, %xmm12
; CHECK-NEXT: jmp .LBB0_3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_4: # %switch_done
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: vpsubd %zmm2, %zmm12, %zmm12
; CHECK-NEXT: vpcmpltud %zmm10, %zmm12, %k1 {%k1}
; CHECK-NEXT: kandw %k1, %k0, %k2
; CHECK-NEXT: kmovd %k2, %eax
; CHECK-NEXT: ktestw %k1, %k0
; CHECK-NEXT: je .LBB0_5
; CHECK-NEXT: .LBB0_3: # %for_loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vpcmpltud %zmm9, %zmm3, %k2 {%k1}
; CHECK-NEXT: vpaddd %zmm3, %zmm1, %zmm1 {%k2}
; CHECK-NEXT: kandw %k2, %k0, %k3
; CHECK-NEXT: kmovd %k3, %ecx
; CHECK-NEXT: cmpw %cx, %ax
; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.6: # %not_all_continued_or_breaked
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: vpcmpltud %zmm11, %zmm4, %k3 {%k1}
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm1 {%k3}
; CHECK-NEXT: korw %k3, %k2, %k2
; CHECK-NEXT: kandw %k2, %k0, %k3
; CHECK-NEXT: kmovd %k3, %ecx
; CHECK-NEXT: cmpw %cx, %ax
; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.7: # %not_all_continued_or_breaked95
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: vpcmpltud %zmm9, %zmm5, %k3 {%k1}
; CHECK-NEXT: vpaddd %zmm6, %zmm1, %zmm1 {%k3}
; CHECK-NEXT: korw %k2, %k3, %k2
; CHECK-NEXT: kandw %k2, %k0, %k2
; CHECK-NEXT: kmovd %k2, %ecx
; CHECK-NEXT: cmpw %cx, %ax
; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.8: # %not_all_continued_or_breaked135
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: vpcmpltud %zmm9, %zmm7, %k2 {%k1}
; CHECK-NEXT: vpaddd %zmm8, %zmm1, %zmm1 {%k2}
; CHECK-NEXT: jmp .LBB0_4
; CHECK-NEXT: .LBB0_5: # %for_exit
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
allocas:
%"internal_mask&function_mask7208" = and <16 x i1> %__mask, <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
%mask_i16.i.i209 = bitcast <16 x i1> %"internal_mask&function_mask7208" to i16
%res.i.not210 = icmp eq i16 %mask_i16.i.i209, 0
br i1 %res.i.not210, label %for_exit, label %for_loop.lr.ph
for_loop.lr.ph:
%0 = add <16 x i32> %a, splat (i32 -1)
%"entry_mask&case_match32185" = icmp ult <16 x i32> %0, splat (i32 4)
%1 = add <16 x i32> %a, splat (i32 -13)
%2 = icmp ult <16 x i32> %1, splat (i32 -12)
%3 = add <16 x i32> %a, splat (i32 -5)
%"entry_mask&case_match115188" = icmp ult <16 x i32> %3, splat (i32 4)
%add_a_load120_ = add nsw <16 x i32> %a, splat (i32 1)
%4 = add <16 x i32> %a, splat (i32 -9)
%"entry_mask&case_match155191" = icmp ult <16 x i32> %4, splat (i32 4)
%mul_a_load160_ = shl nsw <16 x i32> %a, splat (i32 1)
br label %for_loop
for_loop:
%mask_i16.i.i214 = phi i16 [ %mask_i16.i.i209, %for_loop.lr.ph ], [ %mask_i16.i.i, %switch_done ]
%"oldMask&test213" = phi <16 x i1> [ <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, %for_loop.lr.ph ], [ %"oldMask&test", %switch_done ]
%i.0212 = phi <16 x i32> [ zeroinitializer, %for_loop.lr.ph ], [ %i_load170_plus1, %switch_done ]
%res.0211 = phi <16 x i32> [ zeroinitializer, %for_loop.lr.ph ], [ %res.1, %switch_done ]
%"mask|case_match34" = and <16 x i1> %"entry_mask&case_match32185", %"oldMask&test213"
%add_res_load_sub_a_load37_ = select <16 x i1> %"mask|case_match34", <16 x i32> %0, <16 x i32> zeroinitializer
%5 = add nsw <16 x i32> %add_res_load_sub_a_load37_, %res.0211
%"finished&func" = and <16 x i1> %__mask, %"mask|case_match34"
%mask_i16.i.i192 = bitcast <16 x i1> %"finished&func" to i16
%"equal_finished&func_internal_mask&function_mask13" = icmp eq i16 %mask_i16.i.i214, %mask_i16.i.i192
br i1 %"equal_finished&func_internal_mask&function_mask13", label %switch_done, label %not_all_continued_or_breaked
switch_done:
%res.1 = phi <16 x i32> [ %5, %for_loop ], [ %6, %not_all_continued_or_breaked ], [ %7, %not_all_continued_or_breaked95 ], [ %8, %not_all_continued_or_breaked135 ]
%i_load170_plus1 = add nuw nsw <16 x i32> %i.0212, splat (i32 1)
%less_i_load_ = icmp samesign ult <16 x i32> %i_load170_plus1, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%"oldMask&test" = and <16 x i1> %"oldMask&test213", %less_i_load_
%"internal_mask&function_mask7" = and <16 x i1> %__mask, %"oldMask&test"
%mask_i16.i.i = bitcast <16 x i1> %"internal_mask&function_mask7" to i16
%res.i.not = icmp eq i16 %mask_i16.i.i, 0
br i1 %res.i.not, label %for_exit, label %for_loop
for_exit:
%res.0.lcssa = phi <16 x i32> [ zeroinitializer, %allocas ], [ %res.1, %switch_done ]
ret <16 x i32> %res.0.lcssa
not_all_continued_or_breaked:
%"default&~case_match76" = and <16 x i1> %2, %"oldMask&test213"
%add_res_load82_a_load80 = select <16 x i1> %"default&~case_match76", <16 x i32> %a, <16 x i32> zeroinitializer
%6 = add nsw <16 x i32> %5, %add_res_load82_a_load80
%"mask|break_mask86" = or <16 x i1> %"mask|case_match34", %"default&~case_match76"
%"finished&func92" = and <16 x i1> %__mask, %"mask|break_mask86"
%mask_i16.i.i196 = bitcast <16 x i1> %"finished&func92" to i16
%"equal_finished&func92_internal_mask&function_mask13" = icmp eq i16 %mask_i16.i.i214, %mask_i16.i.i196
br i1 %"equal_finished&func92_internal_mask&function_mask13", label %switch_done, label %not_all_continued_or_breaked95
not_all_continued_or_breaked95:
%"mask|case_match117" = and <16 x i1> %"entry_mask&case_match115188", %"oldMask&test213"
%add_res_load122_add_a_load120_ = select <16 x i1> %"mask|case_match117", <16 x i32> %add_a_load120_, <16 x i32> zeroinitializer
%7 = add nsw <16 x i32> %6, %add_res_load122_add_a_load120_
%"mask|break_mask126" = or <16 x i1> %"mask|case_match117", %"mask|break_mask86"
%"finished&func132" = and <16 x i1> %__mask, %"mask|break_mask126"
%mask_i16.i.i198 = bitcast <16 x i1> %"finished&func132" to i16
%"equal_finished&func132_internal_mask&function_mask13" = icmp eq i16 %mask_i16.i.i214, %mask_i16.i.i198
br i1 %"equal_finished&func132_internal_mask&function_mask13", label %switch_done, label %not_all_continued_or_breaked135
not_all_continued_or_breaked135:
%"mask|case_match157" = and <16 x i1> %"entry_mask&case_match155191", %"oldMask&test213"
%add_res_load162_mul_a_load160_ = select <16 x i1> %"mask|case_match157", <16 x i32> %mul_a_load160_, <16 x i32> zeroinitializer
%8 = add nsw <16 x i32> %7, %add_res_load162_mul_a_load160_
br label %switch_done
}