blob: 8e552eeae8c9465ce722bb21e5c040179712d850 [file] [log] [blame]
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -05001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Nicolai Haehnle814abb52018-10-31 13:27:08 +00002; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
Nicolai Haehnle814abb52018-10-31 13:27:08 +00003
Nicolai Haehnle3ffd3832018-04-04 10:57:58 +00004define amdgpu_ps void @i1_copy_from_loop(<4 x i32> inreg %rsrc, i32 %tid) {
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -05005; SI-LABEL: i1_copy_from_loop:
6; SI: ; %bb.0: ; %entry
Matt Arsenaultd719f1c2021-08-03 19:09:44 -04007; SI-NEXT: s_mov_b32 s14, 0
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -05008; SI-NEXT: s_mov_b64 s[4:5], 0
Matt Arsenaultd719f1c2021-08-03 19:09:44 -04009; SI-NEXT: ; implicit-def: $sgpr6_sgpr7
Matt Arsenault778351d2020-06-24 10:27:23 -040010; SI-NEXT: ; implicit-def: $sgpr8_sgpr9
RamNalamothu18f935122021-11-20 01:53:38 +053011; SI-NEXT: s_branch .LBB0_3
12; SI-NEXT: .LBB0_1: ; in Loop: Header=BB0_3 Depth=1
alex-ted0f4412021-07-15 19:43:56 +030013; SI-NEXT: ; implicit-def: $sgpr14
RamNalamothu18f935122021-11-20 01:53:38 +053014; SI-NEXT: .LBB0_2: ; %Flow
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -050015; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
Matt Arsenaultd719f1c2021-08-03 19:09:44 -040016; SI-NEXT: s_and_b64 s[12:13], exec, s[8:9]
17; SI-NEXT: s_or_b64 s[4:5], s[12:13], s[4:5]
18; SI-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
19; SI-NEXT: s_and_b64 s[10:11], s[10:11], exec
20; SI-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -050021; SI-NEXT: s_andn2_b64 exec, exec, s[4:5]
RamNalamothu18f935122021-11-20 01:53:38 +053022; SI-NEXT: s_cbranch_execz .LBB0_7
23; SI-NEXT: .LBB0_3: ; %for.body
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -050024; SI-NEXT: ; =>This Inner Loop Header: Depth=1
alex-ted0f4412021-07-15 19:43:56 +030025; SI-NEXT: s_cmp_lt_u32 s14, 4
26; SI-NEXT: s_cselect_b64 s[10:11], -1, 0
Matt Arsenaultd719f1c2021-08-03 19:09:44 -040027; SI-NEXT: s_or_b64 s[8:9], s[8:9], exec
28; SI-NEXT: s_cmp_gt_u32 s14, 3
RamNalamothu18f935122021-11-20 01:53:38 +053029; SI-NEXT: s_cbranch_scc1 .LBB0_1
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -050030; SI-NEXT: ; %bb.4: ; %mid.loop
31; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
Matt Arsenaultd719f1c2021-08-03 19:09:44 -040032; SI-NEXT: v_mov_b32_e32 v1, s14
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -050033; SI-NEXT: buffer_load_dword v1, v[0:1], s[0:3], 0 idxen offen
Matt Arsenaultd719f1c2021-08-03 19:09:44 -040034; SI-NEXT: s_mov_b64 s[10:11], -1
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -050035; SI-NEXT: s_waitcnt vmcnt(0)
36; SI-NEXT: v_cmp_le_f32_e32 vcc, 0, v1
Matt Arsenaultd719f1c2021-08-03 19:09:44 -040037; SI-NEXT: s_mov_b64 s[8:9], -1
38; SI-NEXT: s_and_saveexec_b64 s[12:13], vcc
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -050039; SI-NEXT: ; %bb.5: ; %end.loop
40; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
Matt Arsenaultd719f1c2021-08-03 19:09:44 -040041; SI-NEXT: s_add_i32 s14, s14, 1
42; SI-NEXT: s_xor_b64 s[8:9], exec, -1
alex-ted0f4412021-07-15 19:43:56 +030043; SI-NEXT: ; %bb.6: ; %Flow1
44; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
45; SI-NEXT: s_or_b64 exec, exec, s[12:13]
RamNalamothu18f935122021-11-20 01:53:38 +053046; SI-NEXT: s_branch .LBB0_2
47; SI-NEXT: .LBB0_7: ; %for.end
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -050048; SI-NEXT: s_or_b64 exec, exec, s[4:5]
Matt Arsenaultd719f1c2021-08-03 19:09:44 -040049; SI-NEXT: s_and_saveexec_b64 s[0:1], s[6:7]
RamNalamothu18f935122021-11-20 01:53:38 +053050; SI-NEXT: s_cbranch_execz .LBB0_9
alex-ted0f4412021-07-15 19:43:56 +030051; SI-NEXT: ; %bb.8: ; %if
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -050052; SI-NEXT: exp mrt0 v0, v0, v0, v0 done vm
RamNalamothu18f935122021-11-20 01:53:38 +053053; SI-NEXT: .LBB0_9: ; %end
Matt Arsenaultc28f1fa2020-01-31 18:14:50 -050054; SI-NEXT: s_endpgm
Nicolai Haehnle3ffd3832018-04-04 10:57:58 +000055entry:
56 br label %for.body
57
58for.body:
59 %i = phi i32 [0, %entry], [%i.inc, %end.loop]
60 %cc = icmp ult i32 %i, 4
61 br i1 %cc, label %mid.loop, label %for.end
62
63mid.loop:
Matt Arsenault20ca49b2020-01-16 11:34:19 -050064 %v = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %tid, i32 %i, i32 0, i32 0)
Nicolai Haehnle3ffd3832018-04-04 10:57:58 +000065 %cc2 = fcmp oge float %v, 0.0
66 br i1 %cc2, label %end.loop, label %for.end
67
68end.loop:
69 %i.inc = add i32 %i, 1
70 br label %for.body
71
72for.end:
73 br i1 %cc, label %if, label %end
74
75if:
76 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float undef, float undef, float undef, float undef, i1 true, i1 true)
77 br label %end
78
79end:
80 ret void
81}
82
Matt Arsenault20ca49b2020-01-16 11:34:19 -050083declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
Nicolai Haehnle3ffd3832018-04-04 10:57:58 +000084declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
85
86attributes #0 = { nounwind readonly }
87attributes #1 = { nounwind inaccessiblememonly }