freedreno: small fix for flushing dependent batches
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_sched.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29
30 #include "util/u_math.h"
31
32 #include "ir3.h"
33
34 /*
35 * Instruction Scheduling:
36 *
37 * A recursive depth based scheduling algo. Recursively find an eligible
38 * instruction to schedule from the deepest instruction (recursing through
39 * it's unscheduled src instructions). Normally this would result in a
40 * lot of re-traversal of the same instructions, so we cache results in
41 * instr->data (and clear cached results that would be no longer valid
42 * after scheduling an instruction).
43 *
44 * There are a few special cases that need to be handled, since sched
45 * is currently independent of register allocation. Usages of address
46 * register (a0.x) or predicate register (p0.x) must be serialized. Ie.
47 * if you have two pairs of instructions that write the same special
48 * register and then read it, then those pairs cannot be interleaved.
49 * To solve this, when we are in such a scheduling "critical section",
50 * and we encounter a conflicting write to a special register, we try
51 * to schedule any remaining instructions that use that value first.
52 */
53
54 struct ir3_sched_ctx {
55 struct ir3_block *block; /* the current block */
56 struct list_head depth_list; /* depth sorted unscheduled instrs */
57 struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
58 struct ir3_instruction *addr; /* current a0.x user, if any */
59 struct ir3_instruction *pred; /* current p0.x user, if any */
60 bool error;
61 };
62
63 static bool is_sfu_or_mem(struct ir3_instruction *instr)
64 {
65 return is_sfu(instr) || is_mem(instr);
66 }
67
68 #define NULL_INSTR ((void *)~0)
69
70 static void
71 clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
72 {
73 list_for_each_entry (struct ir3_instruction, instr2, &ctx->depth_list, node) {
74 if ((instr2->data == instr) || (instr2->data == NULL_INSTR) || !instr)
75 instr2->data = NULL;
76 }
77 }
78
79 static void
80 schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
81 {
82 debug_assert(ctx->block == instr->block);
83
84 /* maybe there is a better way to handle this than just stuffing
85 * a nop.. ideally we'd know about this constraint in the
86 * scheduling and depth calculation..
87 */
88 if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
89 ir3_NOP(ctx->block);
90
91 /* remove from depth list:
92 */
93 list_delinit(&instr->node);
94
95 if (writes_addr(instr)) {
96 debug_assert(ctx->addr == NULL);
97 ctx->addr = instr;
98 }
99
100 if (writes_pred(instr)) {
101 debug_assert(ctx->pred == NULL);
102 ctx->pred = instr;
103 }
104
105 instr->flags |= IR3_INSTR_MARK;
106
107 list_addtail(&instr->node, &instr->block->instr_list);
108 ctx->scheduled = instr;
109
110 if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) {
111 clear_cache(ctx, NULL);
112 } else {
113 /* invalidate only the necessary entries.. */
114 clear_cache(ctx, instr);
115 }
116 }
117
118 static struct ir3_instruction *
119 deepest(struct ir3_instruction **srcs, unsigned nsrcs)
120 {
121 struct ir3_instruction *d = NULL;
122 unsigned i = 0, id = 0;
123
124 while ((i < nsrcs) && !(d = srcs[id = i]))
125 i++;
126
127 if (!d)
128 return NULL;
129
130 for (; i < nsrcs; i++)
131 if (srcs[i] && (srcs[i]->depth > d->depth))
132 d = srcs[id = i];
133
134 srcs[id] = NULL;
135
136 return d;
137 }
138
139 /**
140 * @block: the block to search in, starting from end; in first pass,
141 * this will be the block the instruction would be inserted into
142 * (but has not yet, ie. it only contains already scheduled
143 * instructions). For intra-block scheduling (second pass), this
144 * would be one of the predecessor blocks.
145 * @instr: the instruction to search for
146 * @maxd: max distance, bail after searching this # of instruction
147 * slots, since it means the instruction we are looking for is
148 * far enough away
149 * @pred: if true, recursively search into predecessor blocks to
150 * find the worst case (shortest) distance (only possible after
151 * individual blocks are all scheduled
152 */
153 static unsigned
154 distance(struct ir3_block *block, struct ir3_instruction *instr,
155 unsigned maxd, bool pred)
156 {
157 unsigned d = 0;
158
159 list_for_each_entry_rev (struct ir3_instruction, n, &block->instr_list, node) {
160 if ((n == instr) || (d >= maxd))
161 return d;
162 /* NOTE: don't count branch/jump since we don't know yet if they will
163 * be eliminated later in resolve_jumps().. really should do that
164 * earlier so we don't have this constraint.
165 */
166 if (is_alu(n) || (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR)))
167 d++;
168 }
169
170 /* if coming from a predecessor block, assume it is assigned far
171 * enough away.. we'll fix up later.
172 */
173 if (!pred)
174 return maxd;
175
176 if (pred && (block->data != block)) {
177 /* Search into predecessor blocks, finding the one with the
178 * shortest distance, since that will be the worst case
179 */
180 unsigned min = maxd - d;
181
182 /* (ab)use block->data to prevent recursion: */
183 block->data = block;
184
185 for (unsigned i = 0; i < block->predecessors_count; i++) {
186 unsigned n;
187
188 n = distance(block->predecessors[i], instr, min, pred);
189
190 min = MIN2(min, n);
191 }
192
193 block->data = NULL;
194 d += min;
195 }
196
197 return d;
198 }
199
200 /* calculate delay for specified src: */
201 static unsigned
202 delay_calc_srcn(struct ir3_block *block,
203 struct ir3_instruction *assigner,
204 struct ir3_instruction *consumer,
205 unsigned srcn, bool soft, bool pred)
206 {
207 unsigned delay = 0;
208
209 if (is_meta(assigner)) {
210 struct ir3_instruction *src;
211 foreach_ssa_src(src, assigner) {
212 unsigned d;
213 d = delay_calc_srcn(block, src, consumer, srcn, soft, pred);
214 delay = MAX2(delay, d);
215 }
216 } else {
217 if (soft) {
218 if (is_sfu(assigner)) {
219 delay = 4;
220 } else {
221 delay = ir3_delayslots(assigner, consumer, srcn);
222 }
223 } else {
224 delay = ir3_delayslots(assigner, consumer, srcn);
225 }
226 delay -= distance(block, assigner, delay, pred);
227 }
228
229 return delay;
230 }
231
232 /* calculate delay for instruction (maximum of delay for all srcs): */
233 static unsigned
234 delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
235 bool soft, bool pred)
236 {
237 unsigned delay = 0;
238 struct ir3_instruction *src;
239
240 foreach_ssa_src_n(src, i, instr) {
241 unsigned d;
242 d = delay_calc_srcn(block, src, instr, i, soft, pred);
243 delay = MAX2(delay, d);
244 }
245
246 return delay;
247 }
248
249 struct ir3_sched_notes {
250 /* there is at least one kill which could be scheduled, except
251 * for unscheduled bary.f's:
252 */
253 bool blocked_kill;
254 /* there is at least one instruction that could be scheduled,
255 * except for conflicting address/predicate register usage:
256 */
257 bool addr_conflict, pred_conflict;
258 };
259
260 static bool is_scheduled(struct ir3_instruction *instr)
261 {
262 return !!(instr->flags & IR3_INSTR_MARK);
263 }
264
265 /* could an instruction be scheduled if specified ssa src was scheduled? */
266 static bool
267 could_sched(struct ir3_instruction *instr, struct ir3_instruction *src)
268 {
269 struct ir3_instruction *other_src;
270 foreach_ssa_src(other_src, instr) {
271 /* if dependency not scheduled, we aren't ready yet: */
272 if ((src != other_src) && !is_scheduled(other_src)) {
273 return false;
274 }
275 }
276 return true;
277 }
278
279 /* Check if instruction is ok to schedule. Make sure it is not blocked
280 * by use of addr/predicate register, etc.
281 */
282 static bool
283 check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
284 struct ir3_instruction *instr)
285 {
286 /* For instructions that write address register we need to
287 * make sure there is at least one instruction that uses the
288 * addr value which is otherwise ready.
289 *
290 * TODO if any instructions use pred register and have other
291 * src args, we would need to do the same for writes_pred()..
292 */
293 if (writes_addr(instr)) {
294 struct ir3 *ir = instr->block->shader;
295 bool ready = false;
296 for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
297 struct ir3_instruction *indirect = ir->indirects[i];
298 if (!indirect)
299 continue;
300 if (indirect->address != instr)
301 continue;
302 ready = could_sched(indirect, instr);
303 }
304
305 /* nothing could be scheduled, so keep looking: */
306 if (!ready)
307 return false;
308 }
309
310 /* if this is a write to address/predicate register, and that
311 * register is currently in use, we need to defer until it is
312 * free:
313 */
314 if (writes_addr(instr) && ctx->addr) {
315 debug_assert(ctx->addr != instr);
316 notes->addr_conflict = true;
317 return false;
318 }
319
320 if (writes_pred(instr) && ctx->pred) {
321 debug_assert(ctx->pred != instr);
322 notes->pred_conflict = true;
323 return false;
324 }
325
326 /* if the instruction is a kill, we need to ensure *every*
327 * bary.f is scheduled. The hw seems unhappy if the thread
328 * gets killed before the end-input (ei) flag is hit.
329 *
330 * We could do this by adding each bary.f instruction as
331 * virtual ssa src for the kill instruction. But we have
332 * fixed length instr->regs[].
333 *
334 * TODO this wouldn't be quite right if we had multiple
335 * basic blocks, if any block was conditional. We'd need
336 * to schedule the bary.f's outside of any block which
337 * was conditional that contained a kill.. I think..
338 */
339 if (is_kill(instr)) {
340 struct ir3 *ir = instr->block->shader;
341
342 for (unsigned i = 0; i < ir->baryfs_count; i++) {
343 struct ir3_instruction *baryf = ir->baryfs[i];
344 if (baryf->flags & IR3_INSTR_UNUSED)
345 continue;
346 if (!is_scheduled(baryf)) {
347 notes->blocked_kill = true;
348 return false;
349 }
350 }
351 }
352
353 return true;
354 }
355
356 /* Find the best instruction to schedule from specified instruction or
357 * recursively it's ssa sources.
358 */
359 static struct ir3_instruction *
360 find_instr_recursive(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
361 struct ir3_instruction *instr)
362 {
363 struct ir3_instruction *srcs[__ssa_src_cnt(instr)];
364 struct ir3_instruction *src;
365 unsigned nsrcs = 0;
366
367 if (is_scheduled(instr))
368 return NULL;
369
370 /* use instr->data to cache the results of recursing up the
371 * instr src's. Otherwise the recursive algo can scale quite
372 * badly w/ shader size. But this takes some care to clear
373 * the cache appropriately when instructions are scheduled.
374 */
375 if (instr->data) {
376 if (instr->data == NULL_INSTR)
377 return NULL;
378 return instr->data;
379 }
380
381 /* find unscheduled srcs: */
382 foreach_ssa_src(src, instr) {
383 if (!is_scheduled(src)) {
384 debug_assert(nsrcs < ARRAY_SIZE(srcs));
385 srcs[nsrcs++] = src;
386 }
387 }
388
389 /* if all our src's are already scheduled: */
390 if (nsrcs == 0) {
391 if (check_instr(ctx, notes, instr)) {
392 instr->data = instr;
393 return instr;
394 }
395 return NULL;
396 }
397
398 while ((src = deepest(srcs, nsrcs))) {
399 struct ir3_instruction *candidate;
400
401 candidate = find_instr_recursive(ctx, notes, src);
402 if (!candidate)
403 continue;
404
405 if (check_instr(ctx, notes, candidate)) {
406 instr->data = candidate;
407 return candidate;
408 }
409 }
410
411 instr->data = NULL_INSTR;
412 return NULL;
413 }
414
415 /* find instruction to schedule: */
416 static struct ir3_instruction *
417 find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
418 bool soft)
419 {
420 struct ir3_instruction *best_instr = NULL;
421 unsigned min_delay = ~0;
422
423 /* TODO we'd really rather use the list/array of block outputs. But we
424 * don't have such a thing. Recursing *every* instruction in the list
425 * will result in a lot of repeated traversal, since instructions will
426 * get traversed both when they appear as ssa src to a later instruction
427 * as well as where they appear in the depth_list.
428 */
429 list_for_each_entry_rev (struct ir3_instruction, instr, &ctx->depth_list, node) {
430 struct ir3_instruction *candidate;
431 unsigned delay;
432
433 candidate = find_instr_recursive(ctx, notes, instr);
434 if (!candidate)
435 continue;
436
437 delay = delay_calc(ctx->block, candidate, soft, false);
438 if (delay < min_delay) {
439 best_instr = candidate;
440 min_delay = delay;
441 }
442
443 if (min_delay == 0)
444 break;
445 }
446
447 return best_instr;
448 }
449
450 /* "spill" the address register by remapping any unscheduled
451 * instructions which depend on the current address register
452 * to a clone of the instruction which wrote the address reg.
453 */
454 static struct ir3_instruction *
455 split_addr(struct ir3_sched_ctx *ctx)
456 {
457 struct ir3 *ir;
458 struct ir3_instruction *new_addr = NULL;
459 unsigned i;
460
461 debug_assert(ctx->addr);
462
463 ir = ctx->addr->block->shader;
464
465 for (i = 0; i < ir->indirects_count; i++) {
466 struct ir3_instruction *indirect = ir->indirects[i];
467
468 if (!indirect)
469 continue;
470
471 /* skip instructions already scheduled: */
472 if (is_scheduled(indirect))
473 continue;
474
475 /* remap remaining instructions using current addr
476 * to new addr:
477 */
478 if (indirect->address == ctx->addr) {
479 if (!new_addr) {
480 new_addr = ir3_instr_clone(ctx->addr);
481 /* original addr is scheduled, but new one isn't: */
482 new_addr->flags &= ~IR3_INSTR_MARK;
483 }
484 ir3_instr_set_address(indirect, new_addr);
485 }
486 }
487
488 /* all remaining indirects remapped to new addr: */
489 ctx->addr = NULL;
490
491 return new_addr;
492 }
493
494 /* "spill" the predicate register by remapping any unscheduled
495 * instructions which depend on the current predicate register
496 * to a clone of the instruction which wrote the address reg.
497 */
498 static struct ir3_instruction *
499 split_pred(struct ir3_sched_ctx *ctx)
500 {
501 struct ir3 *ir;
502 struct ir3_instruction *new_pred = NULL;
503 unsigned i;
504
505 debug_assert(ctx->pred);
506
507 ir = ctx->pred->block->shader;
508
509 for (i = 0; i < ir->predicates_count; i++) {
510 struct ir3_instruction *predicated = ir->predicates[i];
511
512 /* skip instructions already scheduled: */
513 if (is_scheduled(predicated))
514 continue;
515
516 /* remap remaining instructions using current pred
517 * to new pred:
518 *
519 * TODO is there ever a case when pred isn't first
520 * (and only) src?
521 */
522 if (ssa(predicated->regs[1]) == ctx->pred) {
523 if (!new_pred) {
524 new_pred = ir3_instr_clone(ctx->pred);
525 /* original pred is scheduled, but new one isn't: */
526 new_pred->flags &= ~IR3_INSTR_MARK;
527 }
528 predicated->regs[1]->instr = new_pred;
529 }
530 }
531
532 /* all remaining predicated remapped to new pred: */
533 ctx->pred = NULL;
534
535 return new_pred;
536 }
537
538 static void
539 sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
540 {
541 struct list_head unscheduled_list;
542
543 ctx->block = block;
544
545 /* addr/pred writes are per-block: */
546 ctx->addr = NULL;
547 ctx->pred = NULL;
548
549 /* move all instructions to the unscheduled list, and
550 * empty the block's instruction list (to which we will
551 * be inserting).
552 */
553 list_replace(&block->instr_list, &unscheduled_list);
554 list_inithead(&block->instr_list);
555 list_inithead(&ctx->depth_list);
556
557 /* first a pre-pass to schedule all meta:input instructions
558 * (which need to appear first so that RA knows the register is
559 * occupied), and move remaining to depth sorted list:
560 */
561 list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
562 if (instr->opc == OPC_META_INPUT) {
563 schedule(ctx, instr);
564 } else {
565 ir3_insert_by_depth(instr, &ctx->depth_list);
566 }
567 }
568
569 while (!list_empty(&ctx->depth_list)) {
570 struct ir3_sched_notes notes = {0};
571 struct ir3_instruction *instr;
572
573 instr = find_eligible_instr(ctx, &notes, true);
574 if (!instr)
575 instr = find_eligible_instr(ctx, &notes, false);
576
577 if (instr) {
578 unsigned delay = delay_calc(ctx->block, instr, false, false);
579
580 /* and if we run out of instructions that can be scheduled,
581 * then it is time for nop's:
582 */
583 debug_assert(delay <= 6);
584 while (delay > 0) {
585 ir3_NOP(block);
586 delay--;
587 }
588
589 schedule(ctx, instr);
590 } else {
591 struct ir3_instruction *new_instr = NULL;
592
593 /* nothing available to schedule.. if we are blocked on
594 * address/predicate register conflict, then break the
595 * deadlock by cloning the instruction that wrote that
596 * reg:
597 */
598 if (notes.addr_conflict) {
599 new_instr = split_addr(ctx);
600 } else if (notes.pred_conflict) {
601 new_instr = split_pred(ctx);
602 } else {
603 debug_assert(0);
604 ctx->error = true;
605 return;
606 }
607
608 if (new_instr) {
609 /* clearing current addr/pred can change what is
610 * available to schedule, so clear cache..
611 */
612 clear_cache(ctx, NULL);
613
614 ir3_insert_by_depth(new_instr, &ctx->depth_list);
615 /* the original instr that wrote addr/pred may have
616 * originated from a different block:
617 */
618 new_instr->block = block;
619 }
620 }
621 }
622
623 /* And lastly, insert branch/jump instructions to take us to
624 * the next block. Later we'll strip back out the branches
625 * that simply jump to next instruction.
626 */
627 if (block->successors[1]) {
628 /* if/else, conditional branches to "then" or "else": */
629 struct ir3_instruction *br;
630 unsigned delay = 6;
631
632 debug_assert(ctx->pred);
633 debug_assert(block->condition);
634
635 delay -= distance(ctx->block, ctx->pred, delay, false);
636
637 while (delay > 0) {
638 ir3_NOP(block);
639 delay--;
640 }
641
642 /* create "else" branch first (since "then" block should
643 * frequently/always end up being a fall-thru):
644 */
645 br = ir3_BR(block);
646 br->cat0.inv = true;
647 br->cat0.target = block->successors[1];
648
649 /* NOTE: we have to hard code delay of 6 above, since
650 * we want to insert the nop's before constructing the
651 * branch. Throw in an assert so we notice if this
652 * ever breaks on future generation:
653 */
654 debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6);
655
656 br = ir3_BR(block);
657 br->cat0.target = block->successors[0];
658
659 } else if (block->successors[0]) {
660 /* otherwise unconditional jump to next block: */
661 struct ir3_instruction *jmp;
662
663 jmp = ir3_JUMP(block);
664 jmp->cat0.target = block->successors[0];
665 }
666
667 /* NOTE: if we kept track of the predecessors, we could do a better
668 * job w/ (jp) flags.. every node w/ > predecessor is a join point.
669 * Note that as we eliminate blocks which contain only an unconditional
670 * jump we probably need to propagate (jp) flag..
671 */
672 }
673
674 /* After scheduling individual blocks, we still could have cases where
675 * one (or more) paths into a block, a value produced by a previous
676 * has too few delay slots to be legal. We can't deal with this in the
677 * first pass, because loops (ie. we can't ensure all predecessor blocks
678 * are already scheduled in the first pass). All we can really do at
679 * this point is stuff in extra nop's until things are legal.
680 */
681 static void
682 sched_intra_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
683 {
684 unsigned n = 0;
685
686 ctx->block = block;
687
688 list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
689 unsigned delay = 0;
690
691 for (unsigned i = 0; i < block->predecessors_count; i++) {
692 unsigned d = delay_calc(block->predecessors[i], instr, false, true);
693 delay = MAX2(d, delay);
694 }
695
696 while (delay > n) {
697 struct ir3_instruction *nop = ir3_NOP(block);
698
699 /* move to before instr: */
700 list_delinit(&nop->node);
701 list_addtail(&nop->node, &instr->node);
702
703 n++;
704 }
705
706 /* we can bail once we hit worst case delay: */
707 if (++n > 6)
708 break;
709 }
710 }
711
712 int ir3_sched(struct ir3 *ir)
713 {
714 struct ir3_sched_ctx ctx = {0};
715
716 ir3_clear_mark(ir);
717
718 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
719 sched_block(&ctx, block);
720 }
721
722 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
723 sched_intra_block(&ctx, block);
724 }
725
726 if (ctx.error)
727 return -1;
728 return 0;
729 }
730
731 /* does instruction 'prior' need to be scheduled before 'instr'? */
732 static bool
733 depends_on(struct ir3_instruction *instr, struct ir3_instruction *prior)
734 {
735 /* TODO for dependencies that are related to a specific object, ie
736 * a specific SSBO/image/array, we could relax this constraint to
737 * make accesses to unrelated objects not depend on each other (at
738 * least as long as not declared coherent)
739 */
740 if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) && prior->barrier_class) ||
741 ((prior->barrier_class & IR3_BARRIER_EVERYTHING) && instr->barrier_class))
742 return true;
743 return !!(instr->barrier_class & prior->barrier_conflict);
744 }
745
746 static void
747 add_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr)
748 {
749 struct list_head *prev = instr->node.prev;
750 struct list_head *next = instr->node.next;
751
752 /* add dependencies on previous instructions that must be scheduled
753 * prior to the current instruction
754 */
755 while (prev != &block->instr_list) {
756 struct ir3_instruction *pi =
757 LIST_ENTRY(struct ir3_instruction, prev, node);
758
759 prev = prev->prev;
760
761 if (is_meta(pi))
762 continue;
763
764 if (instr->barrier_class == pi->barrier_class) {
765 ir3_instr_add_dep(instr, pi);
766 break;
767 }
768
769 if (depends_on(instr, pi))
770 ir3_instr_add_dep(instr, pi);
771 }
772
773 /* add dependencies on this instruction to following instructions
774 * that must be scheduled after the current instruction:
775 */
776 while (next != &block->instr_list) {
777 struct ir3_instruction *ni =
778 LIST_ENTRY(struct ir3_instruction, next, node);
779
780 next = next->next;
781
782 if (is_meta(ni))
783 continue;
784
785 if (instr->barrier_class == ni->barrier_class) {
786 ir3_instr_add_dep(ni, instr);
787 break;
788 }
789
790 if (depends_on(ni, instr))
791 ir3_instr_add_dep(ni, instr);
792 }
793 }
794
795 /* before scheduling a block, we need to add any necessary false-dependencies
796 * to ensure that:
797 *
798 * (1) barriers are scheduled in the right order wrt instructions related
799 * to the barrier
800 *
801 * (2) reads that come before a write actually get scheduled before the
802 * write
803 */
804 static void
805 calculate_deps(struct ir3_block *block)
806 {
807 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
808 if (instr->barrier_class) {
809 add_barrier_deps(block, instr);
810 }
811 }
812 }
813
814 void
815 ir3_sched_add_deps(struct ir3 *ir)
816 {
817 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
818 calculate_deps(block);
819 }
820 }