[x86] Fix Intel OpenCL builtin CalleeSavedRegs on skx

Summary: Align with AVX512 builtins implementations, some of which don't preserve rdi.

Reviewers: yubing, tianqing, craig.topper

Reviewed By: craig.topper

Subscribers: yaxunl, Anastasia, hiraditya

Differential Revision: https://reviews.llvm.org/D77032
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index db1aef2..3ec947d 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -1145,7 +1145,7 @@
 def CSR_64_Intel_OCL_BI_AVX    : CalleeSavedRegs<(add CSR_64,
                                                   (sequence "YMM%u", 8, 15))>;
 
-def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RDI, RSI, R14, R15,
+def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RSI, R14, R15,
                                                   (sequence "ZMM%u", 16, 31),
                                                   K4, K5, K6, K7)>;
 
diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
index 751d610..232b598 100644
--- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
@@ -294,8 +294,7 @@
 ; X64-KNL-LABEL: test_prolog_epilog:
 ; X64-KNL:       ## %bb.0:
 ; X64-KNL-NEXT:    pushq %rsi
-; X64-KNL-NEXT:    pushq %rdi
-; X64-KNL-NEXT:    subq $1064, %rsp ## imm = 0x428
+; X64-KNL-NEXT:    subq $1072, %rsp ## imm = 0x430
 ; X64-KNL-NEXT:    kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; X64-KNL-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; X64-KNL-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
@@ -337,16 +336,14 @@
 ; X64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; X64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; X64-KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
-; X64-KNL-NEXT:    addq $1064, %rsp ## imm = 0x428
-; X64-KNL-NEXT:    popq %rdi
+; X64-KNL-NEXT:    addq $1072, %rsp ## imm = 0x430
 ; X64-KNL-NEXT:    popq %rsi
 ; X64-KNL-NEXT:    retq
 ;
 ; X64-SKX-LABEL: test_prolog_epilog:
 ; X64-SKX:       ## %bb.0:
 ; X64-SKX-NEXT:    pushq %rsi
-; X64-SKX-NEXT:    pushq %rdi
-; X64-SKX-NEXT:    subq $1064, %rsp ## imm = 0x428
+; X64-SKX-NEXT:    subq $1072, %rsp ## imm = 0x430
 ; X64-SKX-NEXT:    kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; X64-SKX-NEXT:    kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; X64-SKX-NEXT:    kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
@@ -388,8 +385,7 @@
 ; X64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
 ; X64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
 ; X64-SKX-NEXT:    kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
-; X64-SKX-NEXT:    addq $1064, %rsp ## imm = 0x428
-; X64-SKX-NEXT:    popq %rdi
+; X64-SKX-NEXT:    addq $1072, %rsp ## imm = 0x430
 ; X64-SKX-NEXT:    popq %rsi
 ; X64-SKX-NEXT:    retq
    %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)