blob: 43aede5a795ee384e9f9da585da89f5d4eee20dd [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
define i128 @hot_end_mask(ptr %token, i32 %end) {
; SSE2-LABEL: hot_end_mask:
; SSE2: # %bb.0:
; SSE2-NEXT: # kill: def $esi killed $esi def $rsi
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3]
; SSE2-NEXT: movl (%rdi), %eax
; SSE2-NEXT: movq %xmm0, %rdx
; SSE2-NEXT: shlq $32, %rsi
; SSE2-NEXT: orq %rsi, %rax
; SSE2-NEXT: movq %xmm0, 8(%rdi)
; SSE2-NEXT: movq %rax, (%rdi)
; SSE2-NEXT: retq
;
; SSE42-LABEL: hot_end_mask:
; SSE42: # %bb.0:
; SSE42-NEXT: # kill: def $esi killed $esi def $rsi
; SSE42-NEXT: movdqa (%rdi), %xmm0
; SSE42-NEXT: pextrq $1, %xmm0, %rdx
; SSE42-NEXT: movd %xmm0, %eax
; SSE42-NEXT: shlq $32, %rsi
; SSE42-NEXT: pextrq $1, %xmm0, 8(%rdi)
; SSE42-NEXT: orq %rsi, %rax
; SSE42-NEXT: movq %rax, (%rdi)
; SSE42-NEXT: retq
;
; AVX2-LABEL: hot_end_mask:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vpextrq $1, %xmm0, %rdx
; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: shlq $32, %rsi
; AVX2-NEXT: orq %rsi, %rax
; AVX2-NEXT: vpextrq $1, %xmm0, 8(%rdi)
; AVX2-NEXT: movq %rax, (%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: hot_end_mask:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
; AVX512-NEXT: vmovdqa (%rdi), %xmm0
; AVX512-NEXT: vpextrq $1, %xmm0, %rdx
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: shlq $32, %rsi
; AVX512-NEXT: orq %rsi, %rax
; AVX512-NEXT: vpextrq $1, %xmm0, 8(%rdi)
; AVX512-NEXT: movq %rax, (%rdi)
; AVX512-NEXT: retq
%load = load i128, ptr %token, align 16
%mask = and i128 %load, -18446744069414584321
%zext = zext i32 %end to i128
%shl = shl nuw nsw i128 %zext, 32
%blend = or disjoint i128 %mask, %shl
store i128 %blend, ptr %token, align 16
ret i128 %blend
}
define i128 @hot_start_mask(ptr %token, i32 %start) {
; SSE2-LABEL: hot_start_mask:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa (%rdi), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: movq %xmm1, %rdx
; SSE2-NEXT: movq %xmm0, %rcx
; SSE2-NEXT: movl %esi, %eax
; SSE2-NEXT: orq %rcx, %rax
; SSE2-NEXT: movq %xmm1, 8(%rdi)
; SSE2-NEXT: movq %rax, (%rdi)
; SSE2-NEXT: retq
;
; SSE42-LABEL: hot_start_mask:
; SSE42: # %bb.0:
; SSE42-NEXT: movdqa (%rdi), %xmm0
; SSE42-NEXT: pxor %xmm1, %xmm1
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5,6,7]
; SSE42-NEXT: movq %xmm1, %rcx
; SSE42-NEXT: pextrq $1, %xmm0, %rdx
; SSE42-NEXT: movl %esi, %eax
; SSE42-NEXT: pextrq $1, %xmm0, 8(%rdi)
; SSE42-NEXT: orq %rcx, %rax
; SSE42-NEXT: movq %rax, (%rdi)
; SSE42-NEXT: retq
;
; AVX-LABEL: hot_start_mask:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: vmovq %xmm1, %rcx
; AVX-NEXT: vpextrq $1, %xmm0, %rdx
; AVX-NEXT: movl %esi, %eax
; AVX-NEXT: orq %rcx, %rax
; AVX-NEXT: vpextrq $1, %xmm0, 8(%rdi)
; AVX-NEXT: movq %rax, (%rdi)
; AVX-NEXT: retq
%load = load i128, ptr %token, align 16
%mask = and i128 %load, -4294967296
%zext = zext i32 %start to i128
%blend = or disjoint i128 %mask, %zext
store i128 %blend, ptr %token, align 16
ret i128 %blend
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
; SSE: {{.*}}