blob: 49f0e9a6b1455453f1238d083948a743c5ea819d [file] [log] [blame]
JaydeepChauhan140d175472025-04-04 17:12:20 +05301; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-X87
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
5
6; Ideally this would compile to 5 multiplies.
7
8define double @pow_wrapper(double %a) nounwind readonly ssp noredzone {
9; X86-X87-LABEL: pow_wrapper:
10; X86-X87: # %bb.0:
11; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
12; X86-X87-NEXT: fld %st(0)
13; X86-X87-NEXT: fmul %st(1), %st
14; X86-X87-NEXT: fmul %st, %st(1)
15; X86-X87-NEXT: fmul %st, %st(0)
16; X86-X87-NEXT: fmul %st, %st(1)
17; X86-X87-NEXT: fmul %st, %st(0)
18; X86-X87-NEXT: fmulp %st, %st(1)
19; X86-X87-NEXT: retl
20;
21; X86-SSE-LABEL: pow_wrapper:
22; X86-SSE: # %bb.0:
23; X86-SSE-NEXT: pushl %ebp
24; X86-SSE-NEXT: movl %esp, %ebp
25; X86-SSE-NEXT: andl $-8, %esp
26; X86-SSE-NEXT: subl $8, %esp
27; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
28; X86-SSE-NEXT: movapd %xmm0, %xmm1
29; X86-SSE-NEXT: mulsd %xmm0, %xmm1
30; X86-SSE-NEXT: mulsd %xmm1, %xmm0
31; X86-SSE-NEXT: mulsd %xmm1, %xmm1
32; X86-SSE-NEXT: mulsd %xmm1, %xmm0
33; X86-SSE-NEXT: mulsd %xmm1, %xmm1
34; X86-SSE-NEXT: mulsd %xmm0, %xmm1
35; X86-SSE-NEXT: movsd %xmm1, (%esp)
36; X86-SSE-NEXT: fldl (%esp)
37; X86-SSE-NEXT: movl %ebp, %esp
38; X86-SSE-NEXT: popl %ebp
39; X86-SSE-NEXT: retl
40;
41; X64-LABEL: pow_wrapper:
42; X64: # %bb.0:
43; X64-NEXT: movapd %xmm0, %xmm1
44; X64-NEXT: mulsd %xmm0, %xmm1
45; X64-NEXT: mulsd %xmm1, %xmm0
46; X64-NEXT: mulsd %xmm1, %xmm1
47; X64-NEXT: mulsd %xmm1, %xmm0
48; X64-NEXT: mulsd %xmm1, %xmm1
49; X64-NEXT: mulsd %xmm1, %xmm0
50; X64-NEXT: retq
51 %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
52 ret double %ret
53}
54
55define double @pow_wrapper_optsize(double %a) nounwind optsize {
56; X86-X87-LABEL: pow_wrapper_optsize:
57; X86-X87: # %bb.0:
58; X86-X87-NEXT: subl $12, %esp
59; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
60; X86-X87-NEXT: fstpl (%esp)
61; X86-X87-NEXT: movl $15, {{[0-9]+}}(%esp)
62; X86-X87-NEXT: calll __powidf2
63; X86-X87-NEXT: addl $12, %esp
64; X86-X87-NEXT: retl
65;
66; X86-SSE-LABEL: pow_wrapper_optsize:
67; X86-SSE: # %bb.0:
68; X86-SSE-NEXT: subl $12, %esp
69; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
70; X86-SSE-NEXT: movsd %xmm0, (%esp)
71; X86-SSE-NEXT: movl $15, {{[0-9]+}}(%esp)
72; X86-SSE-NEXT: calll __powidf2
73; X86-SSE-NEXT: addl $12, %esp
74; X86-SSE-NEXT: retl
75;
76; X64-LABEL: pow_wrapper_optsize:
77; X64: # %bb.0:
78; X64-NEXT: movl $15, %edi
79; X64-NEXT: jmp __powidf2@PLT # TAILCALL
80 %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
81 ret double %ret
82}
83
84define double @pow_wrapper_pgso(double %a) nounwind !prof !14 {
85; X86-X87-LABEL: pow_wrapper_pgso:
86; X86-X87: # %bb.0:
87; X86-X87-NEXT: subl $12, %esp
88; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
89; X86-X87-NEXT: fstpl (%esp)
90; X86-X87-NEXT: movl $15, {{[0-9]+}}(%esp)
91; X86-X87-NEXT: calll __powidf2
92; X86-X87-NEXT: addl $12, %esp
93; X86-X87-NEXT: retl
94;
95; X86-SSE-LABEL: pow_wrapper_pgso:
96; X86-SSE: # %bb.0:
97; X86-SSE-NEXT: subl $12, %esp
98; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
99; X86-SSE-NEXT: movsd %xmm0, (%esp)
100; X86-SSE-NEXT: movl $15, {{[0-9]+}}(%esp)
101; X86-SSE-NEXT: calll __powidf2
102; X86-SSE-NEXT: addl $12, %esp
103; X86-SSE-NEXT: retl
104;
105; X64-LABEL: pow_wrapper_pgso:
106; X64: # %bb.0:
107; X64-NEXT: movl $15, %edi
108; X64-NEXT: jmp __powidf2@PLT # TAILCALL
109 %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
110 ret double %ret
111}
112
113define double @pow_wrapper_minsize(double %a) nounwind minsize {
114; X86-X87-LABEL: pow_wrapper_minsize:
115; X86-X87: # %bb.0:
116; X86-X87-NEXT: subl $12, %esp
117; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
118; X86-X87-NEXT: fstpl (%esp)
119; X86-X87-NEXT: movl $15, {{[0-9]+}}(%esp)
120; X86-X87-NEXT: calll __powidf2
121; X86-X87-NEXT: addl $12, %esp
122; X86-X87-NEXT: retl
123;
124; X86-SSE-LABEL: pow_wrapper_minsize:
125; X86-SSE: # %bb.0:
126; X86-SSE-NEXT: subl $12, %esp
127; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
128; X86-SSE-NEXT: movsd %xmm0, (%esp)
129; X86-SSE-NEXT: movl $15, {{[0-9]+}}(%esp)
130; X86-SSE-NEXT: calll __powidf2
131; X86-SSE-NEXT: addl $12, %esp
132; X86-SSE-NEXT: retl
133;
134; X64-LABEL: pow_wrapper_minsize:
135; X64: # %bb.0:
136; X64-NEXT: pushq $15
137; X64-NEXT: popq %rdi
138; X64-NEXT: jmp __powidf2@PLT # TAILCALL
139 %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
140 ret double %ret
141}
142
143define <2 x float> @powi_v2f32(<2 x float> %a) nounwind minsize {
144; X86-X87-LABEL: powi_v2f32:
145; X86-X87: # %bb.0:
146; X86-X87-NEXT: pushl %esi
147; X86-X87-NEXT: subl $16, %esp
148; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
149; X86-X87-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
150; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
151; X86-X87-NEXT: pushl $15
152; X86-X87-NEXT: popl %esi
153; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp)
154; X86-X87-NEXT: fstps (%esp)
155; X86-X87-NEXT: calll __powisf2
156; X86-X87-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
157; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp)
158; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
159; X86-X87-NEXT: fstps (%esp)
160; X86-X87-NEXT: calll __powisf2
161; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
162; X86-X87-NEXT: fxch %st(1)
163; X86-X87-NEXT: addl $16, %esp
164; X86-X87-NEXT: popl %esi
165; X86-X87-NEXT: retl
166;
167; X86-SSE-LABEL: powi_v2f32:
168; X86-SSE: # %bb.0:
169; X86-SSE-NEXT: pushl %esi
170; X86-SSE-NEXT: subl $32, %esp
171; X86-SSE-NEXT: movups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
172; X86-SSE-NEXT: pushl $15
173; X86-SSE-NEXT: popl %esi
174; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
175; X86-SSE-NEXT: movss %xmm0, (%esp)
176; X86-SSE-NEXT: calll __powisf2
177; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
178; X86-SSE-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
179; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
180; X86-SSE-NEXT: movss %xmm0, (%esp)
181; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
182; X86-SSE-NEXT: calll __powisf2
183; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
184; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
185; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
186; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
187; X86-SSE-NEXT: addl $32, %esp
188; X86-SSE-NEXT: popl %esi
189; X86-SSE-NEXT: retl
190;
191; X64-LABEL: powi_v2f32:
192; X64: # %bb.0:
193; X64-NEXT: pushq %rbx
194; X64-NEXT: subq $32, %rsp
195; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
196; X64-NEXT: pushq $15
197; X64-NEXT: popq %rbx
198; X64-NEXT: movl %ebx, %edi
199; X64-NEXT: callq __powisf2@PLT
200; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
201; X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
202; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
203; X64-NEXT: movl %ebx, %edi
204; X64-NEXT: callq __powisf2@PLT
205; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
206; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
207; X64-NEXT: movaps %xmm1, %xmm0
208; X64-NEXT: addq $32, %rsp
209; X64-NEXT: popq %rbx
210; X64-NEXT: retq
211 %ret = tail call < 2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 15) nounwind ;
212 ret <2 x float> %ret
213}
214
215declare double @llvm.powi.f64.i32(double, i32) nounwind readonly
216declare < 2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) nounwind readonly
217
218!llvm.module.flags = !{!0}
219!0 = !{i32 1, !"ProfileSummary", !1}
220!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
221!2 = !{!"ProfileFormat", !"InstrProf"}
222!3 = !{!"TotalCount", i64 10000}
223!4 = !{!"MaxCount", i64 10}
224!5 = !{!"MaxInternalCount", i64 1}
225!6 = !{!"MaxFunctionCount", i64 1000}
226!7 = !{!"NumCounts", i64 3}
227!8 = !{!"NumFunctions", i64 3}
228!9 = !{!"DetailedSummary", !10}
229!10 = !{!11, !12, !13}
230!11 = !{i32 10000, i64 100, i32 1}
231!12 = !{i32 999000, i64 100, i32 1}
232!13 = !{i32 999999, i64 1, i32 2}
233!14 = !{!"function_entry_count", i64 0}