2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
28 #include "util/u_math.h"
31 #include "ir3_compiler.h"
34 #define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS)
38 #define d(fmt, ...) do { if (SCHED_DEBUG) { \
39 printf("SCHED: "fmt"\n", ##__VA_ARGS__); \
42 #define di(instr, fmt, ...) do { if (SCHED_DEBUG) { \
43 printf("SCHED: "fmt": ", ##__VA_ARGS__); \
44 ir3_print_instr(instr); \
48 * Instruction Scheduling:
50 * A recursive depth based scheduling algo. Recursively find an eligible
51 * instruction to schedule from the deepest instruction (recursing through
52 * it's unscheduled src instructions). Normally this would result in a
53 * lot of re-traversal of the same instructions, so we cache results in
54 * instr->data (and clear cached results that would be no longer valid
55 * after scheduling an instruction).
57 * There are a few special cases that need to be handled, since sched
58 * is currently independent of register allocation. Usages of address
59 * register (a0.x) or predicate register (p0.x) must be serialized. Ie.
60 * if you have two pairs of instructions that write the same special
61 * register and then read it, then those pairs cannot be interleaved.
62 * To solve this, when we are in such a scheduling "critical section",
63 * and we encounter a conflicting write to a special register, we try
64 * to schedule any remaining instructions that use that value first.
67 struct ir3_sched_ctx
{
68 struct ir3_block
*block
; /* the current block */
69 struct list_head depth_list
; /* depth sorted unscheduled instrs */
70 struct ir3_instruction
*scheduled
; /* last scheduled instr XXX remove*/
71 struct ir3_instruction
*addr
; /* current a0.x user, if any */
72 struct ir3_instruction
*pred
; /* current p0.x user, if any */
73 int live_values
; /* estimate of current live values */
74 int half_live_values
; /* estimate of current half precision live values */
77 unsigned live_threshold_hi
;
78 unsigned live_threshold_lo
;
79 unsigned depth_threshold_hi
;
80 unsigned depth_threshold_lo
;
83 static bool is_scheduled(struct ir3_instruction
*instr
)
85 return !!(instr
->flags
& IR3_INSTR_MARK
);
89 unuse_each_src(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*instr
)
91 struct ir3_instruction
*src
;
93 foreach_ssa_src_n(src
, n
, instr
) {
94 if (__is_false_dep(instr
, n
))
96 if (instr
->block
!= src
->block
)
98 if ((src
->opc
== OPC_META_COLLECT
) || (src
->opc
== OPC_META_SPLIT
)) {
99 unuse_each_src(ctx
, src
);
101 debug_assert(src
->use_count
> 0);
103 if (--src
->use_count
== 0) {
105 ctx
->half_live_values
-= dest_regs(src
);
106 debug_assert(ctx
->half_live_values
>= 0);
108 ctx
->live_values
-= dest_regs(src
);
109 debug_assert(ctx
->live_values
>= 0);
116 static void clear_cache(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*instr
);
117 static void use_instr(struct ir3_instruction
*instr
);
119 /* transfers a use-count to new instruction, for cases where we
120 * "spill" address or predicate. Note this might cause the
121 * previous instruction that loaded a0.x/p0.x to become live
122 * again, when we previously thought it was dead.
125 transfer_use(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*orig_instr
,
126 struct ir3_instruction
*new_instr
)
128 struct ir3_instruction
*src
;
130 debug_assert(is_scheduled(orig_instr
));
132 foreach_ssa_src_n(src
, n
, new_instr
) {
133 if (__is_false_dep(new_instr
, n
))
135 if (is_half(new_instr
)) {
136 ctx
->half_live_values
+= dest_regs(src
);
138 ctx
->live_values
+= dest_regs(src
);
143 clear_cache(ctx
, orig_instr
);
147 use_each_src(struct ir3_instruction
*instr
)
149 struct ir3_instruction
*src
;
151 foreach_ssa_src_n(src
, n
, instr
) {
152 if (__is_false_dep(instr
, n
))
159 use_instr(struct ir3_instruction
*instr
)
161 if ((instr
->opc
== OPC_META_COLLECT
) || (instr
->opc
== OPC_META_SPLIT
)) {
169 update_live_values(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*scheduled
)
171 if ((scheduled
->opc
== OPC_META_COLLECT
) || (scheduled
->opc
== OPC_META_SPLIT
))
174 if ((scheduled
->regs_count
> 0) && is_half(scheduled
)) {
175 ctx
->half_live_values
+= dest_regs(scheduled
);
177 ctx
->live_values
+= dest_regs(scheduled
);
180 unuse_each_src(ctx
, scheduled
);
184 update_use_count(struct ir3
*ir
)
186 foreach_block (block
, &ir
->block_list
) {
187 foreach_instr (instr
, &block
->instr_list
) {
188 instr
->use_count
= 0;
192 foreach_block (block
, &ir
->block_list
) {
193 foreach_instr (instr
, &block
->instr_list
) {
194 if ((instr
->opc
== OPC_META_COLLECT
) || (instr
->opc
== OPC_META_SPLIT
))
201 /* Shader outputs are also used:
203 struct ir3_instruction
*out
;
204 foreach_output(out
, ir
)
208 #define NULL_INSTR ((void *)~0)
211 clear_cache(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*instr
)
213 foreach_instr (instr2
, &ctx
->depth_list
) {
214 if ((instr2
->data
== instr
) || (instr2
->data
== NULL_INSTR
) || !instr
)
220 schedule(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*instr
)
222 debug_assert(ctx
->block
== instr
->block
);
224 /* remove from depth list:
226 list_delinit(&instr
->node
);
228 if (writes_addr(instr
)) {
229 debug_assert(ctx
->addr
== NULL
);
233 if (writes_pred(instr
)) {
234 debug_assert(ctx
->pred
== NULL
);
238 instr
->flags
|= IR3_INSTR_MARK
;
240 di(instr
, "schedule");
242 list_addtail(&instr
->node
, &instr
->block
->instr_list
);
243 ctx
->scheduled
= instr
;
245 update_live_values(ctx
, instr
);
247 if (writes_addr(instr
) || writes_pred(instr
) || is_input(instr
)) {
248 clear_cache(ctx
, NULL
);
250 /* invalidate only the necessary entries.. */
251 clear_cache(ctx
, instr
);
255 static struct ir3_instruction
*
256 deepest(struct ir3_instruction
**srcs
, unsigned nsrcs
)
258 struct ir3_instruction
*d
= NULL
;
259 unsigned i
= 0, id
= 0;
261 while ((i
< nsrcs
) && !(d
= srcs
[id
= i
]))
267 for (; i
< nsrcs
; i
++)
268 if (srcs
[i
] && (srcs
[i
]->depth
> d
->depth
))
276 struct ir3_sched_notes
{
277 /* there is at least one kill which could be scheduled, except
278 * for unscheduled bary.f's:
281 /* there is at least one instruction that could be scheduled,
282 * except for conflicting address/predicate register usage:
284 bool addr_conflict
, pred_conflict
;
287 /* could an instruction be scheduled if specified ssa src was scheduled? */
289 could_sched(struct ir3_instruction
*instr
, struct ir3_instruction
*src
)
291 struct ir3_instruction
*other_src
;
292 foreach_ssa_src(other_src
, instr
) {
293 /* if dependency not scheduled, we aren't ready yet: */
294 if ((src
!= other_src
) && !is_scheduled(other_src
)) {
301 /* Check if instruction is ok to schedule. Make sure it is not blocked
302 * by use of addr/predicate register, etc.
305 check_instr(struct ir3_sched_ctx
*ctx
, struct ir3_sched_notes
*notes
,
306 struct ir3_instruction
*instr
)
308 debug_assert(!is_scheduled(instr
));
310 /* For instructions that write address register we need to
311 * make sure there is at least one instruction that uses the
312 * addr value which is otherwise ready.
314 * TODO if any instructions use pred register and have other
315 * src args, we would need to do the same for writes_pred()..
317 if (writes_addr(instr
)) {
318 struct ir3
*ir
= instr
->block
->shader
;
320 for (unsigned i
= 0; (i
< ir
->indirects_count
) && !ready
; i
++) {
321 struct ir3_instruction
*indirect
= ir
->indirects
[i
];
324 if (indirect
->address
!= instr
)
326 ready
= could_sched(indirect
, instr
);
329 /* nothing could be scheduled, so keep looking: */
334 /* if this is a write to address/predicate register, and that
335 * register is currently in use, we need to defer until it is
338 if (writes_addr(instr
) && ctx
->addr
) {
339 debug_assert(ctx
->addr
!= instr
);
340 notes
->addr_conflict
= true;
344 if (writes_pred(instr
) && ctx
->pred
) {
345 debug_assert(ctx
->pred
!= instr
);
346 notes
->pred_conflict
= true;
350 /* if the instruction is a kill, we need to ensure *every*
351 * bary.f is scheduled. The hw seems unhappy if the thread
352 * gets killed before the end-input (ei) flag is hit.
354 * We could do this by adding each bary.f instruction as
355 * virtual ssa src for the kill instruction. But we have
356 * fixed length instr->regs[].
358 * TODO this wouldn't be quite right if we had multiple
359 * basic blocks, if any block was conditional. We'd need
360 * to schedule the bary.f's outside of any block which
361 * was conditional that contained a kill.. I think..
363 if (is_kill(instr
)) {
364 struct ir3
*ir
= instr
->block
->shader
;
366 for (unsigned i
= 0; i
< ir
->baryfs_count
; i
++) {
367 struct ir3_instruction
*baryf
= ir
->baryfs
[i
];
368 if (baryf
->flags
& IR3_INSTR_UNUSED
)
370 if (!is_scheduled(baryf
)) {
371 notes
->blocked_kill
= true;
380 /* Find the best instruction to schedule from specified instruction or
381 * recursively it's ssa sources.
383 static struct ir3_instruction
*
384 find_instr_recursive(struct ir3_sched_ctx
*ctx
, struct ir3_sched_notes
*notes
,
385 struct ir3_instruction
*instr
)
387 struct ir3_instruction
*srcs
[__ssa_src_cnt(instr
)];
388 struct ir3_instruction
*src
;
391 if (is_scheduled(instr
))
394 /* use instr->data to cache the results of recursing up the
395 * instr src's. Otherwise the recursive algo can scale quite
396 * badly w/ shader size. But this takes some care to clear
397 * the cache appropriately when instructions are scheduled.
400 if (instr
->data
== NULL_INSTR
)
405 /* find unscheduled srcs: */
406 foreach_ssa_src(src
, instr
) {
407 if (!is_scheduled(src
) && (src
->block
== instr
->block
)) {
408 debug_assert(nsrcs
< ARRAY_SIZE(srcs
));
413 /* if all our src's are already scheduled: */
415 if (check_instr(ctx
, notes
, instr
)) {
422 while ((src
= deepest(srcs
, nsrcs
))) {
423 struct ir3_instruction
*candidate
;
425 candidate
= find_instr_recursive(ctx
, notes
, src
);
429 if (check_instr(ctx
, notes
, candidate
)) {
430 instr
->data
= candidate
;
435 instr
->data
= NULL_INSTR
;
439 /* find net change to live values if instruction were scheduled: */
441 live_effect(struct ir3_instruction
*instr
)
443 struct ir3_instruction
*src
;
444 int new_live
= dest_regs(instr
);
447 foreach_ssa_src_n(src
, n
, instr
) {
448 if (__is_false_dep(instr
, n
))
451 if (instr
->block
!= src
->block
)
454 /* for split, just pass things along to the real src: */
455 if (src
->opc
== OPC_META_SPLIT
)
456 src
= ssa(src
->regs
[1]);
458 /* for collect, if this is the last use of *each* src,
459 * then it will decrease the live values, since RA treats
462 if (src
->opc
== OPC_META_COLLECT
) {
463 struct ir3_instruction
*src2
;
464 bool last_use
= true;
466 foreach_ssa_src(src2
, src
) {
467 if (src2
->use_count
> 1) {
474 old_live
+= dest_regs(src
);
477 debug_assert(src
->use_count
> 0);
479 if (src
->use_count
== 1) {
480 old_live
+= dest_regs(src
);
485 return new_live
- old_live
;
488 /* find instruction to schedule: */
489 static struct ir3_instruction
*
490 find_eligible_instr(struct ir3_sched_ctx
*ctx
, struct ir3_sched_notes
*notes
,
493 struct ir3_instruction
*best_instr
= NULL
;
494 int best_rank
= INT_MAX
; /* lower is better */
495 unsigned deepest
= 0;
497 /* TODO we'd really rather use the list/array of block outputs. But we
498 * don't have such a thing. Recursing *every* instruction in the list
499 * will result in a lot of repeated traversal, since instructions will
500 * get traversed both when they appear as ssa src to a later instruction
501 * as well as where they appear in the depth_list.
503 foreach_instr_rev (instr
, &ctx
->depth_list
) {
504 struct ir3_instruction
*candidate
;
506 candidate
= find_instr_recursive(ctx
, notes
, instr
);
510 if (is_meta(candidate
))
513 deepest
= MAX2(deepest
, candidate
->depth
);
516 /* traverse the list a second time.. but since we cache the result of
517 * find_instr_recursive() it isn't as bad as it looks.
519 foreach_instr_rev (instr
, &ctx
->depth_list
) {
520 struct ir3_instruction
*candidate
;
522 candidate
= find_instr_recursive(ctx
, notes
, instr
);
526 /* determine net change to # of live values: */
527 int le
= live_effect(candidate
);
528 unsigned live_values
= (2 * ctx
->live_values
) + ctx
->half_live_values
;
530 /* if there is a net increase in # of live values, then apply some
531 * threshold to avoid instructions getting scheduled *too* early
532 * and increasing register pressure.
537 if (live_values
> ctx
->live_threshold_lo
) {
538 threshold
= ctx
->depth_threshold_lo
;
540 threshold
= ctx
->depth_threshold_hi
;
543 /* Filter out any "shallow" instructions which would otherwise
544 * tend to get scheduled too early to fill delay slots even
545 * when they are not needed for a while. There will probably
546 * be later delay slots that they could just as easily fill.
548 * A classic case where this comes up is frag shaders that
549 * write a constant value (like 1.0f) to one of the channels
550 * of the output color(s). Since the mov from immed has no
551 * dependencies, it would otherwise get scheduled early to
552 * fill delay slots, occupying a register until the end of
555 if ((deepest
- candidate
->depth
) > threshold
)
559 int rank
= ir3_delay_calc(ctx
->block
, candidate
, soft
, false);
561 /* if too many live values, prioritize instructions that reduce the
562 * number of live values:
564 if (live_values
> ctx
->live_threshold_hi
) {
566 } else if (live_values
> ctx
->live_threshold_lo
) {
570 if (rank
< best_rank
) {
571 best_instr
= candidate
;
579 static struct ir3_instruction
*
580 split_instr(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*orig_instr
)
582 struct ir3_instruction
*new_instr
= ir3_instr_clone(orig_instr
);
583 ir3_insert_by_depth(new_instr
, &ctx
->depth_list
);
584 transfer_use(ctx
, orig_instr
, new_instr
);
588 /* "spill" the address register by remapping any unscheduled
589 * instructions which depend on the current address register
590 * to a clone of the instruction which wrote the address reg.
592 static struct ir3_instruction
*
593 split_addr(struct ir3_sched_ctx
*ctx
)
596 struct ir3_instruction
*new_addr
= NULL
;
599 debug_assert(ctx
->addr
);
601 ir
= ctx
->addr
->block
->shader
;
603 for (i
= 0; i
< ir
->indirects_count
; i
++) {
604 struct ir3_instruction
*indirect
= ir
->indirects
[i
];
609 /* skip instructions already scheduled: */
610 if (is_scheduled(indirect
))
613 /* remap remaining instructions using current addr
616 if (indirect
->address
== ctx
->addr
) {
618 new_addr
= split_instr(ctx
, ctx
->addr
);
619 /* original addr is scheduled, but new one isn't: */
620 new_addr
->flags
&= ~IR3_INSTR_MARK
;
622 indirect
->address
= NULL
;
623 ir3_instr_set_address(indirect
, new_addr
);
627 /* all remaining indirects remapped to new addr: */
633 /* "spill" the predicate register by remapping any unscheduled
634 * instructions which depend on the current predicate register
635 * to a clone of the instruction which wrote the address reg.
637 static struct ir3_instruction
*
638 split_pred(struct ir3_sched_ctx
*ctx
)
641 struct ir3_instruction
*new_pred
= NULL
;
644 debug_assert(ctx
->pred
);
646 ir
= ctx
->pred
->block
->shader
;
648 for (i
= 0; i
< ir
->predicates_count
; i
++) {
649 struct ir3_instruction
*predicated
= ir
->predicates
[i
];
651 /* skip instructions already scheduled: */
652 if (is_scheduled(predicated
))
655 /* remap remaining instructions using current pred
658 * TODO is there ever a case when pred isn't first
661 if (ssa(predicated
->regs
[1]) == ctx
->pred
) {
663 new_pred
= split_instr(ctx
, ctx
->pred
);
664 /* original pred is scheduled, but new one isn't: */
665 new_pred
->flags
&= ~IR3_INSTR_MARK
;
667 predicated
->regs
[1]->instr
= new_pred
;
671 /* all remaining predicated remapped to new pred: */
678 sched_block(struct ir3_sched_ctx
*ctx
, struct ir3_block
*block
)
680 struct list_head unscheduled_list
;
684 /* addr/pred writes are per-block: */
688 /* move all instructions to the unscheduled list, and
689 * empty the block's instruction list (to which we will
692 list_replace(&block
->instr_list
, &unscheduled_list
);
693 list_inithead(&block
->instr_list
);
694 list_inithead(&ctx
->depth_list
);
696 /* First schedule all meta:input instructions, followed by
697 * tex-prefetch. We want all of the instructions that load
698 * values into registers before the shader starts to go
699 * before any other instructions. But in particular we
700 * want inputs to come before prefetches. This is because
701 * a FS's bary_ij input may not actually be live in the
702 * shader, but it should not be scheduled on top of any
703 * other input (but can be overwritten by a tex prefetch)
705 * Finally, move all the remaining instructions to the depth-
708 foreach_instr_safe (instr
, &unscheduled_list
)
709 if (instr
->opc
== OPC_META_INPUT
)
710 schedule(ctx
, instr
);
712 foreach_instr_safe (instr
, &unscheduled_list
)
713 if (instr
->opc
== OPC_META_TEX_PREFETCH
)
714 schedule(ctx
, instr
);
716 foreach_instr_safe (instr
, &unscheduled_list
)
717 ir3_insert_by_depth(instr
, &ctx
->depth_list
);
719 while (!list_is_empty(&ctx
->depth_list
)) {
720 struct ir3_sched_notes notes
= {0};
721 struct ir3_instruction
*instr
;
723 instr
= find_eligible_instr(ctx
, ¬es
, true);
725 instr
= find_eligible_instr(ctx
, ¬es
, false);
728 unsigned delay
= ir3_delay_calc(ctx
->block
, instr
, false, false);
729 d("delay=%u", delay
);
731 /* and if we run out of instructions that can be scheduled,
732 * then it is time for nop's:
734 debug_assert(delay
<= 6);
740 schedule(ctx
, instr
);
742 struct ir3_instruction
*new_instr
= NULL
;
744 /* nothing available to schedule.. if we are blocked on
745 * address/predicate register conflict, then break the
746 * deadlock by cloning the instruction that wrote that
749 if (notes
.addr_conflict
) {
750 new_instr
= split_addr(ctx
);
751 } else if (notes
.pred_conflict
) {
752 new_instr
= split_pred(ctx
);
760 /* clearing current addr/pred can change what is
761 * available to schedule, so clear cache..
763 clear_cache(ctx
, NULL
);
765 ir3_insert_by_depth(new_instr
, &ctx
->depth_list
);
766 /* the original instr that wrote addr/pred may have
767 * originated from a different block:
769 new_instr
->block
= block
;
776 setup_thresholds(struct ir3_sched_ctx
*ctx
, struct ir3
*ir
)
778 if (ir3_has_latency_to_hide(ir
)) {
779 ctx
->live_threshold_hi
= 2 * 16 * 4;
780 ctx
->live_threshold_lo
= 2 * 4 * 4;
781 ctx
->depth_threshold_hi
= 6;
782 ctx
->depth_threshold_lo
= 4;
784 ctx
->live_threshold_hi
= 2 * 16 * 4;
785 ctx
->live_threshold_lo
= 2 * 12 * 4;
786 ctx
->depth_threshold_hi
= 16;
787 ctx
->depth_threshold_lo
= 16;
791 int ir3_sched(struct ir3
*ir
)
793 struct ir3_sched_ctx ctx
= {0};
795 setup_thresholds(&ctx
, ir
);
798 update_use_count(ir
);
800 foreach_block (block
, &ir
->block_list
) {
802 ctx
.half_live_values
= 0;
803 sched_block(&ctx
, block
);
813 get_array_id(struct ir3_instruction
*instr
)
815 /* The expectation is that there is only a single array
816 * src or dst, ir3_cp should enforce this.
819 for (unsigned i
= 0; i
< instr
->regs_count
; i
++)
820 if (instr
->regs
[i
]->flags
& IR3_REG_ARRAY
)
821 return instr
->regs
[i
]->array
.id
;
823 unreachable("this was unexpected");
826 /* does instruction 'prior' need to be scheduled before 'instr'? */
828 depends_on(struct ir3_instruction
*instr
, struct ir3_instruction
*prior
)
830 /* TODO for dependencies that are related to a specific object, ie
831 * a specific SSBO/image/array, we could relax this constraint to
832 * make accesses to unrelated objects not depend on each other (at
833 * least as long as not declared coherent)
835 if (((instr
->barrier_class
& IR3_BARRIER_EVERYTHING
) && prior
->barrier_class
) ||
836 ((prior
->barrier_class
& IR3_BARRIER_EVERYTHING
) && instr
->barrier_class
))
839 if (instr
->barrier_class
& prior
->barrier_conflict
) {
840 if (!(instr
->barrier_class
& ~(IR3_BARRIER_ARRAY_R
| IR3_BARRIER_ARRAY_W
))) {
841 /* if only array barrier, then we can further limit false-deps
842 * by considering the array-id, ie reads/writes to different
843 * arrays do not depend on each other (no aliasing)
845 if (get_array_id(instr
) != get_array_id(prior
)) {
857 add_barrier_deps(struct ir3_block
*block
, struct ir3_instruction
*instr
)
859 struct list_head
*prev
= instr
->node
.prev
;
860 struct list_head
*next
= instr
->node
.next
;
862 /* add dependencies on previous instructions that must be scheduled
863 * prior to the current instruction
865 while (prev
!= &block
->instr_list
) {
866 struct ir3_instruction
*pi
=
867 LIST_ENTRY(struct ir3_instruction
, prev
, node
);
874 if (instr
->barrier_class
== pi
->barrier_class
) {
875 ir3_instr_add_dep(instr
, pi
);
879 if (depends_on(instr
, pi
))
880 ir3_instr_add_dep(instr
, pi
);
883 /* add dependencies on this instruction to following instructions
884 * that must be scheduled after the current instruction:
886 while (next
!= &block
->instr_list
) {
887 struct ir3_instruction
*ni
=
888 LIST_ENTRY(struct ir3_instruction
, next
, node
);
895 if (instr
->barrier_class
== ni
->barrier_class
) {
896 ir3_instr_add_dep(ni
, instr
);
900 if (depends_on(ni
, instr
))
901 ir3_instr_add_dep(ni
, instr
);
905 /* before scheduling a block, we need to add any necessary false-dependencies
908 * (1) barriers are scheduled in the right order wrt instructions related
911 * (2) reads that come before a write actually get scheduled before the
915 calculate_deps(struct ir3_block
*block
)
917 foreach_instr (instr
, &block
->instr_list
) {
918 if (instr
->barrier_class
) {
919 add_barrier_deps(block
, instr
);
925 ir3_sched_add_deps(struct ir3
*ir
)
927 foreach_block (block
, &ir
->block_list
) {
928 calculate_deps(block
);