[X86] Add optsize SHLD/SHRD tests

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349284 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/fshl.ll b/test/CodeGen/X86/fshl.ll
index b161763..1a55f84 100644
--- a/test/CodeGen/X86/fshl.ll
+++ b/test/CodeGen/X86/fshl.ll
@@ -178,6 +178,51 @@
   ret i32 %tmp
 }
 
+define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
+; X86-FAST-LABEL: var_shift_i32_optsize:
+; X86-FAST:       # %bb.0:
+; X86-FAST-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT:    shldl %cl, %edx, %eax
+; X86-FAST-NEXT:    retl
+;
+; X86-SLOW-LABEL: var_shift_i32_optsize:
+; X86-SLOW:       # %bb.0:
+; X86-SLOW-NEXT:    pushl %esi
+; X86-SLOW-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT:    movl %eax, %edx
+; X86-SLOW-NEXT:    shldl %cl, %esi, %edx
+; X86-SLOW-NEXT:    andb $31, %cl
+; X86-SLOW-NEXT:    je .LBB3_2
+; X86-SLOW-NEXT:  # %bb.1:
+; X86-SLOW-NEXT:    movl %edx, %eax
+; X86-SLOW-NEXT:  .LBB3_2:
+; X86-SLOW-NEXT:    popl %esi
+; X86-SLOW-NEXT:    retl
+;
+; X64-FAST-LABEL: var_shift_i32_optsize:
+; X64-FAST:       # %bb.0:
+; X64-FAST-NEXT:    movl %edx, %ecx
+; X64-FAST-NEXT:    movl %edi, %eax
+; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-FAST-NEXT:    shldl %cl, %esi, %eax
+; X64-FAST-NEXT:    retq
+;
+; X64-SLOW-LABEL: var_shift_i32_optsize:
+; X64-SLOW:       # %bb.0:
+; X64-SLOW-NEXT:    movl %edx, %ecx
+; X64-SLOW-NEXT:    movl %edi, %eax
+; X64-SLOW-NEXT:    shldl %cl, %esi, %eax
+; X64-SLOW-NEXT:    andb $31, %cl
+; X64-SLOW-NEXT:    cmovel %edi, %eax
+; X64-SLOW-NEXT:    retq
+  %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
+  ret i32 %tmp
+}
+
 define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
 ; X86-FAST-LABEL: var_shift_i64:
 ; X86-FAST:       # %bb.0:
@@ -198,36 +243,36 @@
 ; X86-FAST-NEXT:    shll %cl, %edi
 ; X86-FAST-NEXT:    shldl %cl, %eax, %ebp
 ; X86-FAST-NEXT:    testb $32, %bl
-; X86-FAST-NEXT:    je .LBB3_2
+; X86-FAST-NEXT:    je .LBB4_2
 ; X86-FAST-NEXT:  # %bb.1:
 ; X86-FAST-NEXT:    movl %edi, %ebp
 ; X86-FAST-NEXT:    xorl %edi, %edi
-; X86-FAST-NEXT:  .LBB3_2:
+; X86-FAST-NEXT:  .LBB4_2:
 ; X86-FAST-NEXT:    movb $64, %cl
 ; X86-FAST-NEXT:    subb %bl, %cl
 ; X86-FAST-NEXT:    movl %edx, %esi
 ; X86-FAST-NEXT:    shrl %cl, %esi
 ; X86-FAST-NEXT:    shrdl %cl, %edx, (%esp) # 4-byte Folded Spill
 ; X86-FAST-NEXT:    testb $32, %cl
-; X86-FAST-NEXT:    jne .LBB3_3
+; X86-FAST-NEXT:    jne .LBB4_3
 ; X86-FAST-NEXT:  # %bb.4:
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-FAST-NEXT:    movl (%esp), %ecx # 4-byte Reload
 ; X86-FAST-NEXT:    testl %ebx, %ebx
-; X86-FAST-NEXT:    jne .LBB3_6
-; X86-FAST-NEXT:    jmp .LBB3_7
-; X86-FAST-NEXT:  .LBB3_3:
+; X86-FAST-NEXT:    jne .LBB4_6
+; X86-FAST-NEXT:    jmp .LBB4_7
+; X86-FAST-NEXT:  .LBB4_3:
 ; X86-FAST-NEXT:    movl %esi, %ecx
 ; X86-FAST-NEXT:    xorl %esi, %esi
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-FAST-NEXT:    testl %ebx, %ebx
-; X86-FAST-NEXT:    je .LBB3_7
-; X86-FAST-NEXT:  .LBB3_6:
+; X86-FAST-NEXT:    je .LBB4_7
+; X86-FAST-NEXT:  .LBB4_6:
 ; X86-FAST-NEXT:    orl %esi, %ebp
 ; X86-FAST-NEXT:    orl %ecx, %edi
 ; X86-FAST-NEXT:    movl %edi, %eax
 ; X86-FAST-NEXT:    movl %ebp, %edx
-; X86-FAST-NEXT:  .LBB3_7:
+; X86-FAST-NEXT:  .LBB4_7:
 ; X86-FAST-NEXT:    addl $4, %esp
 ; X86-FAST-NEXT:    popl %esi
 ; X86-FAST-NEXT:    popl %edi
@@ -261,11 +306,11 @@
 ; X86-SLOW-NEXT:    testb %dl, %dl
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-SLOW-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT:    je .LBB3_2
+; X86-SLOW-NEXT:    je .LBB4_2
 ; X86-SLOW-NEXT:  # %bb.1:
 ; X86-SLOW-NEXT:    orl %eax, %ebp
 ; X86-SLOW-NEXT:    movl %ebp, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT:  .LBB3_2:
+; X86-SLOW-NEXT:  .LBB4_2:
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-SLOW-NEXT:    movl %ebp, %eax
 ; X86-SLOW-NEXT:    movl %ebx, %ecx
@@ -276,41 +321,41 @@
 ; X86-SLOW-NEXT:    negb %cl
 ; X86-SLOW-NEXT:    shrl %cl, %edi
 ; X86-SLOW-NEXT:    testb %ch, %ch
-; X86-SLOW-NEXT:    je .LBB3_4
+; X86-SLOW-NEXT:    je .LBB4_4
 ; X86-SLOW-NEXT:  # %bb.3:
 ; X86-SLOW-NEXT:    orl %edi, %eax
 ; X86-SLOW-NEXT:    movl %eax, %ebp
-; X86-SLOW-NEXT:  .LBB3_4:
+; X86-SLOW-NEXT:  .LBB4_4:
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SLOW-NEXT:    movl %eax, %edi
 ; X86-SLOW-NEXT:    movl %ebx, %ecx
 ; X86-SLOW-NEXT:    shll %cl, %edi
 ; X86-SLOW-NEXT:    testb $32, %bl
-; X86-SLOW-NEXT:    je .LBB3_6
+; X86-SLOW-NEXT:    je .LBB4_6
 ; X86-SLOW-NEXT:  # %bb.5:
 ; X86-SLOW-NEXT:    movl %edi, %ebp
 ; X86-SLOW-NEXT:    xorl %edi, %edi
-; X86-SLOW-NEXT:  .LBB3_6:
+; X86-SLOW-NEXT:  .LBB4_6:
 ; X86-SLOW-NEXT:    movb %dh, %cl
 ; X86-SLOW-NEXT:    shrl %cl, %esi
 ; X86-SLOW-NEXT:    testb $32, %dh
-; X86-SLOW-NEXT:    jne .LBB3_7
+; X86-SLOW-NEXT:    jne .LBB4_7
 ; X86-SLOW-NEXT:  # %bb.8:
 ; X86-SLOW-NEXT:    movl (%esp), %ecx # 4-byte Reload
 ; X86-SLOW-NEXT:    testl %ebx, %ebx
-; X86-SLOW-NEXT:    jne .LBB3_10
-; X86-SLOW-NEXT:    jmp .LBB3_11
-; X86-SLOW-NEXT:  .LBB3_7:
+; X86-SLOW-NEXT:    jne .LBB4_10
+; X86-SLOW-NEXT:    jmp .LBB4_11
+; X86-SLOW-NEXT:  .LBB4_7:
 ; X86-SLOW-NEXT:    movl %esi, %ecx
 ; X86-SLOW-NEXT:    xorl %esi, %esi
 ; X86-SLOW-NEXT:    testl %ebx, %ebx
-; X86-SLOW-NEXT:    je .LBB3_11
-; X86-SLOW-NEXT:  .LBB3_10:
+; X86-SLOW-NEXT:    je .LBB4_11
+; X86-SLOW-NEXT:  .LBB4_10:
 ; X86-SLOW-NEXT:    orl %esi, %ebp
 ; X86-SLOW-NEXT:    orl %ecx, %edi
 ; X86-SLOW-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-SLOW-NEXT:    movl %edi, %eax
-; X86-SLOW-NEXT:  .LBB3_11:
+; X86-SLOW-NEXT:  .LBB4_11:
 ; X86-SLOW-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-SLOW-NEXT:    addl $8, %esp
 ; X86-SLOW-NEXT:    popl %esi
diff --git a/test/CodeGen/X86/fshr.ll b/test/CodeGen/X86/fshr.ll
index 5bd31dd..09b8da5 100644
--- a/test/CodeGen/X86/fshr.ll
+++ b/test/CodeGen/X86/fshr.ll
@@ -177,6 +177,51 @@
   ret i32 %tmp
 }
 
+define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
+; X86-FAST-LABEL: var_shift_i32_optsize:
+; X86-FAST:       # %bb.0:
+; X86-FAST-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT:    shrdl %cl, %edx, %eax
+; X86-FAST-NEXT:    retl
+;
+; X86-SLOW-LABEL: var_shift_i32_optsize:
+; X86-SLOW:       # %bb.0:
+; X86-SLOW-NEXT:    pushl %esi
+; X86-SLOW-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT:    movl %eax, %edx
+; X86-SLOW-NEXT:    shrdl %cl, %esi, %edx
+; X86-SLOW-NEXT:    andb $31, %cl
+; X86-SLOW-NEXT:    je .LBB3_2
+; X86-SLOW-NEXT:  # %bb.1:
+; X86-SLOW-NEXT:    movl %edx, %eax
+; X86-SLOW-NEXT:  .LBB3_2:
+; X86-SLOW-NEXT:    popl %esi
+; X86-SLOW-NEXT:    retl
+;
+; X64-FAST-LABEL: var_shift_i32_optsize:
+; X64-FAST:       # %bb.0:
+; X64-FAST-NEXT:    movl %edx, %ecx
+; X64-FAST-NEXT:    movl %esi, %eax
+; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-FAST-NEXT:    shrdl %cl, %edi, %eax
+; X64-FAST-NEXT:    retq
+;
+; X64-SLOW-LABEL: var_shift_i32_optsize:
+; X64-SLOW:       # %bb.0:
+; X64-SLOW-NEXT:    movl %edx, %ecx
+; X64-SLOW-NEXT:    movl %esi, %eax
+; X64-SLOW-NEXT:    shrdl %cl, %edi, %eax
+; X64-SLOW-NEXT:    andb $31, %cl
+; X64-SLOW-NEXT:    cmovel %esi, %eax
+; X64-SLOW-NEXT:    retq
+  %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
+  ret i32 %tmp
+}
+
 define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
 ; X86-FAST-LABEL: var_shift_i64:
 ; X86-FAST:       # %bb.0:
@@ -198,30 +243,30 @@
 ; X86-FAST-NEXT:    shll %cl, %edi
 ; X86-FAST-NEXT:    shldl %cl, %eax, %esi
 ; X86-FAST-NEXT:    testb $32, %cl
-; X86-FAST-NEXT:    je .LBB3_2
+; X86-FAST-NEXT:    je .LBB4_2
 ; X86-FAST-NEXT:  # %bb.1:
 ; X86-FAST-NEXT:    movl %edi, %esi
 ; X86-FAST-NEXT:    xorl %edi, %edi
-; X86-FAST-NEXT:  .LBB3_2:
+; X86-FAST-NEXT:  .LBB4_2:
 ; X86-FAST-NEXT:    movl %edx, %ebp
 ; X86-FAST-NEXT:    movl %ebx, %ecx
 ; X86-FAST-NEXT:    shrl %cl, %ebp
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-FAST-NEXT:    shrdl %cl, %edx, %eax
 ; X86-FAST-NEXT:    testb $32, %bl
-; X86-FAST-NEXT:    je .LBB3_4
+; X86-FAST-NEXT:    je .LBB4_4
 ; X86-FAST-NEXT:  # %bb.3:
 ; X86-FAST-NEXT:    movl %ebp, %eax
 ; X86-FAST-NEXT:    xorl %ebp, %ebp
-; X86-FAST-NEXT:  .LBB3_4:
+; X86-FAST-NEXT:  .LBB4_4:
 ; X86-FAST-NEXT:    testl %ebx, %ebx
-; X86-FAST-NEXT:    je .LBB3_6
+; X86-FAST-NEXT:    je .LBB4_6
 ; X86-FAST-NEXT:  # %bb.5:
 ; X86-FAST-NEXT:    orl %ebp, %esi
 ; X86-FAST-NEXT:    orl %eax, %edi
 ; X86-FAST-NEXT:    movl %edi, (%esp) # 4-byte Spill
 ; X86-FAST-NEXT:    movl %esi, %edx
-; X86-FAST-NEXT:  .LBB3_6:
+; X86-FAST-NEXT:  .LBB4_6:
 ; X86-FAST-NEXT:    movl (%esp), %eax # 4-byte Reload
 ; X86-FAST-NEXT:    addl $4, %esp
 ; X86-FAST-NEXT:    popl %esi
@@ -256,11 +301,11 @@
 ; X86-SLOW-NEXT:    shrl %cl, %edi
 ; X86-SLOW-NEXT:    testb %ch, %ch
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT:    je .LBB3_2
+; X86-SLOW-NEXT:    je .LBB4_2
 ; X86-SLOW-NEXT:  # %bb.1:
 ; X86-SLOW-NEXT:    orl %edi, %edx
 ; X86-SLOW-NEXT:    movl %edx, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT:  .LBB3_2:
+; X86-SLOW-NEXT:  .LBB4_2:
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-SLOW-NEXT:    movl %ecx, %edx
 ; X86-SLOW-NEXT:    movl %ebx, %ecx
@@ -273,41 +318,41 @@
 ; X86-SLOW-NEXT:    shll %cl, %edi
 ; X86-SLOW-NEXT:    testb %ah, %ah
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT:    je .LBB3_4
+; X86-SLOW-NEXT:    je .LBB4_4
 ; X86-SLOW-NEXT:  # %bb.3:
 ; X86-SLOW-NEXT:    orl %edx, %edi
 ; X86-SLOW-NEXT:    movl %edi, %ebp
-; X86-SLOW-NEXT:  .LBB3_4:
+; X86-SLOW-NEXT:  .LBB4_4:
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-SLOW-NEXT:    movl %ebx, %ecx
 ; X86-SLOW-NEXT:    shrl %cl, %edi
 ; X86-SLOW-NEXT:    testb $32, %bl
-; X86-SLOW-NEXT:    je .LBB3_6
+; X86-SLOW-NEXT:    je .LBB4_6
 ; X86-SLOW-NEXT:  # %bb.5:
 ; X86-SLOW-NEXT:    movl %edi, %ebp
 ; X86-SLOW-NEXT:    xorl %edi, %edi
-; X86-SLOW-NEXT:  .LBB3_6:
+; X86-SLOW-NEXT:  .LBB4_6:
 ; X86-SLOW-NEXT:    movl %eax, %ecx
 ; X86-SLOW-NEXT:    shll %cl, %esi
 ; X86-SLOW-NEXT:    testb $32, %al
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-SLOW-NEXT:    jne .LBB3_7
+; X86-SLOW-NEXT:    jne .LBB4_7
 ; X86-SLOW-NEXT:  # %bb.8:
 ; X86-SLOW-NEXT:    movl (%esp), %eax # 4-byte Reload
 ; X86-SLOW-NEXT:    testl %ebx, %ebx
-; X86-SLOW-NEXT:    jne .LBB3_10
-; X86-SLOW-NEXT:    jmp .LBB3_11
-; X86-SLOW-NEXT:  .LBB3_7:
+; X86-SLOW-NEXT:    jne .LBB4_10
+; X86-SLOW-NEXT:    jmp .LBB4_11
+; X86-SLOW-NEXT:  .LBB4_7:
 ; X86-SLOW-NEXT:    movl %esi, %eax
 ; X86-SLOW-NEXT:    xorl %esi, %esi
 ; X86-SLOW-NEXT:    testl %ebx, %ebx
-; X86-SLOW-NEXT:    je .LBB3_11
-; X86-SLOW-NEXT:  .LBB3_10:
+; X86-SLOW-NEXT:    je .LBB4_11
+; X86-SLOW-NEXT:  .LBB4_10:
 ; X86-SLOW-NEXT:    orl %ebp, %esi
 ; X86-SLOW-NEXT:    orl %edi, %eax
 ; X86-SLOW-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-SLOW-NEXT:    movl %eax, %edx
-; X86-SLOW-NEXT:  .LBB3_11:
+; X86-SLOW-NEXT:  .LBB4_11:
 ; X86-SLOW-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-SLOW-NEXT:    addl $8, %esp
 ; X86-SLOW-NEXT:    popl %esi