2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
28 #include "util/u_math.h"
31 #include "ir3_compiler.h"
34 #define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS)
38 #define d(fmt, ...) do { if (SCHED_DEBUG) { \
39 printf("SCHED: "fmt"\n", ##__VA_ARGS__); \
42 #define di(instr, fmt, ...) do { if (SCHED_DEBUG) { \
43 printf("SCHED: "fmt": ", ##__VA_ARGS__); \
44 ir3_print_instr(instr); \
48 * Instruction Scheduling:
50 * A recursive depth based scheduling algo. Recursively find an eligible
51 * instruction to schedule from the deepest instruction (recursing through
52 * it's unscheduled src instructions). Normally this would result in a
53 * lot of re-traversal of the same instructions, so we cache results in
54 * instr->data (and clear cached results that would be no longer valid
55 * after scheduling an instruction).
57 * There are a few special cases that need to be handled, since sched
58 * is currently independent of register allocation. Usages of address
59 * register (a0.x) or predicate register (p0.x) must be serialized. Ie.
60 * if you have two pairs of instructions that write the same special
61 * register and then read it, then those pairs cannot be interleaved.
62 * To solve this, when we are in such a scheduling "critical section",
63 * and we encounter a conflicting write to a special register, we try
64 * to schedule any remaining instructions that use that value first.
67 struct ir3_sched_ctx
{
68 struct ir3_block
*block
; /* the current block */
69 struct list_head depth_list
; /* depth sorted unscheduled instrs */
70 struct ir3_instruction
*scheduled
; /* last scheduled instr XXX remove*/
71 struct ir3_instruction
*addr
; /* current a0.x user, if any */
72 struct ir3_instruction
*pred
; /* current p0.x user, if any */
73 int live_values
; /* estimate of current live values */
74 int half_live_values
; /* estimate of current half precision live values */
77 unsigned live_threshold_hi
;
78 unsigned live_threshold_lo
;
79 unsigned depth_threshold_hi
;
80 unsigned depth_threshold_lo
;
83 static bool is_scheduled(struct ir3_instruction
*instr
)
85 return !!(instr
->flags
& IR3_INSTR_MARK
);
88 static bool is_sfu_or_mem(struct ir3_instruction
*instr
)
90 return is_sfu(instr
) || is_mem(instr
);
94 unuse_each_src(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*instr
)
96 struct ir3_instruction
*src
;
98 foreach_ssa_src_n(src
, n
, instr
) {
99 if (__is_false_dep(instr
, n
))
101 if (instr
->block
!= src
->block
)
103 if ((src
->opc
== OPC_META_COLLECT
) || (src
->opc
== OPC_META_SPLIT
)) {
104 unuse_each_src(ctx
, src
);
106 debug_assert(src
->use_count
> 0);
108 if (--src
->use_count
== 0) {
110 ctx
->half_live_values
-= dest_regs(src
);
111 debug_assert(ctx
->half_live_values
>= 0);
113 ctx
->live_values
-= dest_regs(src
);
114 debug_assert(ctx
->live_values
>= 0);
121 static void clear_cache(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*instr
);
122 static void use_instr(struct ir3_instruction
*instr
);
124 /* transfers a use-count to new instruction, for cases where we
125 * "spill" address or predicate. Note this might cause the
126 * previous instruction that loaded a0.x/p0.x to become live
127 * again, when we previously thought it was dead.
130 transfer_use(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*orig_instr
,
131 struct ir3_instruction
*new_instr
)
133 struct ir3_instruction
*src
;
135 debug_assert(is_scheduled(orig_instr
));
137 foreach_ssa_src_n(src
, n
, new_instr
) {
138 if (__is_false_dep(new_instr
, n
))
140 if (is_half(new_instr
)) {
141 ctx
->half_live_values
+= dest_regs(src
);
143 ctx
->live_values
+= dest_regs(src
);
148 clear_cache(ctx
, orig_instr
);
152 use_each_src(struct ir3_instruction
*instr
)
154 struct ir3_instruction
*src
;
156 foreach_ssa_src_n(src
, n
, instr
) {
157 if (__is_false_dep(instr
, n
))
164 use_instr(struct ir3_instruction
*instr
)
166 if ((instr
->opc
== OPC_META_COLLECT
) || (instr
->opc
== OPC_META_SPLIT
)) {
174 update_live_values(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*scheduled
)
176 if ((scheduled
->opc
== OPC_META_COLLECT
) || (scheduled
->opc
== OPC_META_SPLIT
))
179 if ((scheduled
->regs_count
> 0) && is_half(scheduled
)) {
180 ctx
->half_live_values
+= dest_regs(scheduled
);
182 ctx
->live_values
+= dest_regs(scheduled
);
185 unuse_each_src(ctx
, scheduled
);
189 update_use_count(struct ir3
*ir
)
191 foreach_block (block
, &ir
->block_list
) {
192 foreach_instr (instr
, &block
->instr_list
) {
193 instr
->use_count
= 0;
197 foreach_block (block
, &ir
->block_list
) {
198 foreach_instr (instr
, &block
->instr_list
) {
199 if ((instr
->opc
== OPC_META_COLLECT
) || (instr
->opc
== OPC_META_SPLIT
))
206 /* Shader outputs are also used:
208 struct ir3_instruction
*out
;
209 foreach_output(out
, ir
)
213 #define NULL_INSTR ((void *)~0)
216 clear_cache(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*instr
)
218 foreach_instr (instr2
, &ctx
->depth_list
) {
219 if ((instr2
->data
== instr
) || (instr2
->data
== NULL_INSTR
) || !instr
)
225 schedule(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*instr
)
227 debug_assert(ctx
->block
== instr
->block
);
229 /* maybe there is a better way to handle this than just stuffing
230 * a nop.. ideally we'd know about this constraint in the
231 * scheduling and depth calculation..
233 if (ctx
->scheduled
&& is_sfu_or_mem(ctx
->scheduled
) && is_sfu_or_mem(instr
))
236 /* remove from depth list:
238 list_delinit(&instr
->node
);
240 if (writes_addr(instr
)) {
241 debug_assert(ctx
->addr
== NULL
);
245 if (writes_pred(instr
)) {
246 debug_assert(ctx
->pred
== NULL
);
250 instr
->flags
|= IR3_INSTR_MARK
;
252 di(instr
, "schedule");
254 list_addtail(&instr
->node
, &instr
->block
->instr_list
);
255 ctx
->scheduled
= instr
;
257 update_live_values(ctx
, instr
);
259 if (writes_addr(instr
) || writes_pred(instr
) || is_input(instr
)) {
260 clear_cache(ctx
, NULL
);
262 /* invalidate only the necessary entries.. */
263 clear_cache(ctx
, instr
);
267 static struct ir3_instruction
*
268 deepest(struct ir3_instruction
**srcs
, unsigned nsrcs
)
270 struct ir3_instruction
*d
= NULL
;
271 unsigned i
= 0, id
= 0;
273 while ((i
< nsrcs
) && !(d
= srcs
[id
= i
]))
279 for (; i
< nsrcs
; i
++)
280 if (srcs
[i
] && (srcs
[i
]->depth
> d
->depth
))
288 struct ir3_sched_notes
{
289 /* there is at least one kill which could be scheduled, except
290 * for unscheduled bary.f's:
293 /* there is at least one instruction that could be scheduled,
294 * except for conflicting address/predicate register usage:
296 bool addr_conflict
, pred_conflict
;
299 /* could an instruction be scheduled if specified ssa src was scheduled? */
301 could_sched(struct ir3_instruction
*instr
, struct ir3_instruction
*src
)
303 struct ir3_instruction
*other_src
;
304 foreach_ssa_src(other_src
, instr
) {
305 /* if dependency not scheduled, we aren't ready yet: */
306 if ((src
!= other_src
) && !is_scheduled(other_src
)) {
313 /* Check if instruction is ok to schedule. Make sure it is not blocked
314 * by use of addr/predicate register, etc.
317 check_instr(struct ir3_sched_ctx
*ctx
, struct ir3_sched_notes
*notes
,
318 struct ir3_instruction
*instr
)
320 debug_assert(!is_scheduled(instr
));
322 /* For instructions that write address register we need to
323 * make sure there is at least one instruction that uses the
324 * addr value which is otherwise ready.
326 * TODO if any instructions use pred register and have other
327 * src args, we would need to do the same for writes_pred()..
329 if (writes_addr(instr
)) {
330 struct ir3
*ir
= instr
->block
->shader
;
332 for (unsigned i
= 0; (i
< ir
->indirects_count
) && !ready
; i
++) {
333 struct ir3_instruction
*indirect
= ir
->indirects
[i
];
336 if (indirect
->address
!= instr
)
338 ready
= could_sched(indirect
, instr
);
341 /* nothing could be scheduled, so keep looking: */
346 /* if this is a write to address/predicate register, and that
347 * register is currently in use, we need to defer until it is
350 if (writes_addr(instr
) && ctx
->addr
) {
351 debug_assert(ctx
->addr
!= instr
);
352 notes
->addr_conflict
= true;
356 if (writes_pred(instr
) && ctx
->pred
) {
357 debug_assert(ctx
->pred
!= instr
);
358 notes
->pred_conflict
= true;
362 /* if the instruction is a kill, we need to ensure *every*
363 * bary.f is scheduled. The hw seems unhappy if the thread
364 * gets killed before the end-input (ei) flag is hit.
366 * We could do this by adding each bary.f instruction as
367 * virtual ssa src for the kill instruction. But we have
368 * fixed length instr->regs[].
370 * TODO this wouldn't be quite right if we had multiple
371 * basic blocks, if any block was conditional. We'd need
372 * to schedule the bary.f's outside of any block which
373 * was conditional that contained a kill.. I think..
375 if (is_kill(instr
)) {
376 struct ir3
*ir
= instr
->block
->shader
;
378 for (unsigned i
= 0; i
< ir
->baryfs_count
; i
++) {
379 struct ir3_instruction
*baryf
= ir
->baryfs
[i
];
380 if (baryf
->flags
& IR3_INSTR_UNUSED
)
382 if (!is_scheduled(baryf
)) {
383 notes
->blocked_kill
= true;
392 /* Find the best instruction to schedule from specified instruction or
393 * recursively it's ssa sources.
395 static struct ir3_instruction
*
396 find_instr_recursive(struct ir3_sched_ctx
*ctx
, struct ir3_sched_notes
*notes
,
397 struct ir3_instruction
*instr
)
399 struct ir3_instruction
*srcs
[__ssa_src_cnt(instr
)];
400 struct ir3_instruction
*src
;
403 if (is_scheduled(instr
))
406 /* use instr->data to cache the results of recursing up the
407 * instr src's. Otherwise the recursive algo can scale quite
408 * badly w/ shader size. But this takes some care to clear
409 * the cache appropriately when instructions are scheduled.
412 if (instr
->data
== NULL_INSTR
)
417 /* find unscheduled srcs: */
418 foreach_ssa_src(src
, instr
) {
419 if (!is_scheduled(src
) && (src
->block
== instr
->block
)) {
420 debug_assert(nsrcs
< ARRAY_SIZE(srcs
));
425 /* if all our src's are already scheduled: */
427 if (check_instr(ctx
, notes
, instr
)) {
434 while ((src
= deepest(srcs
, nsrcs
))) {
435 struct ir3_instruction
*candidate
;
437 candidate
= find_instr_recursive(ctx
, notes
, src
);
441 if (check_instr(ctx
, notes
, candidate
)) {
442 instr
->data
= candidate
;
447 instr
->data
= NULL_INSTR
;
451 /* find net change to live values if instruction were scheduled: */
453 live_effect(struct ir3_instruction
*instr
)
455 struct ir3_instruction
*src
;
456 int new_live
= dest_regs(instr
);
459 foreach_ssa_src_n(src
, n
, instr
) {
460 if (__is_false_dep(instr
, n
))
463 if (instr
->block
!= src
->block
)
466 /* for split, just pass things along to the real src: */
467 if (src
->opc
== OPC_META_SPLIT
)
468 src
= ssa(src
->regs
[1]);
470 /* for collect, if this is the last use of *each* src,
471 * then it will decrease the live values, since RA treats
474 if (src
->opc
== OPC_META_COLLECT
) {
475 struct ir3_instruction
*src2
;
476 bool last_use
= true;
478 foreach_ssa_src(src2
, src
) {
479 if (src2
->use_count
> 1) {
486 old_live
+= dest_regs(src
);
489 debug_assert(src
->use_count
> 0);
491 if (src
->use_count
== 1) {
492 old_live
+= dest_regs(src
);
497 return new_live
- old_live
;
500 /* find instruction to schedule: */
501 static struct ir3_instruction
*
502 find_eligible_instr(struct ir3_sched_ctx
*ctx
, struct ir3_sched_notes
*notes
,
505 struct ir3_instruction
*best_instr
= NULL
;
506 int best_rank
= INT_MAX
; /* lower is better */
507 unsigned deepest
= 0;
509 /* TODO we'd really rather use the list/array of block outputs. But we
510 * don't have such a thing. Recursing *every* instruction in the list
511 * will result in a lot of repeated traversal, since instructions will
512 * get traversed both when they appear as ssa src to a later instruction
513 * as well as where they appear in the depth_list.
515 foreach_instr_rev (instr
, &ctx
->depth_list
) {
516 struct ir3_instruction
*candidate
;
518 candidate
= find_instr_recursive(ctx
, notes
, instr
);
522 if (is_meta(candidate
))
525 deepest
= MAX2(deepest
, candidate
->depth
);
528 /* traverse the list a second time.. but since we cache the result of
529 * find_instr_recursive() it isn't as bad as it looks.
531 foreach_instr_rev (instr
, &ctx
->depth_list
) {
532 struct ir3_instruction
*candidate
;
534 candidate
= find_instr_recursive(ctx
, notes
, instr
);
538 /* determine net change to # of live values: */
539 int le
= live_effect(candidate
);
540 unsigned live_values
= (2 * ctx
->live_values
) + ctx
->half_live_values
;
542 /* if there is a net increase in # of live values, then apply some
543 * threshold to avoid instructions getting scheduled *too* early
544 * and increasing register pressure.
549 if (live_values
> ctx
->live_threshold_lo
) {
550 threshold
= ctx
->depth_threshold_lo
;
552 threshold
= ctx
->depth_threshold_hi
;
555 /* Filter out any "shallow" instructions which would otherwise
556 * tend to get scheduled too early to fill delay slots even
557 * when they are not needed for a while. There will probably
558 * be later delay slots that they could just as easily fill.
560 * A classic case where this comes up is frag shaders that
561 * write a constant value (like 1.0f) to one of the channels
562 * of the output color(s). Since the mov from immed has no
563 * dependencies, it would otherwise get scheduled early to
564 * fill delay slots, occupying a register until the end of
567 if ((deepest
- candidate
->depth
) > threshold
)
571 int rank
= ir3_delay_calc(ctx
->block
, candidate
, soft
, false);
573 /* if too many live values, prioritize instructions that reduce the
574 * number of live values:
576 if (live_values
> ctx
->live_threshold_hi
) {
578 } else if (live_values
> ctx
->live_threshold_lo
) {
582 if (rank
< best_rank
) {
583 best_instr
= candidate
;
591 static struct ir3_instruction
*
592 split_instr(struct ir3_sched_ctx
*ctx
, struct ir3_instruction
*orig_instr
)
594 struct ir3_instruction
*new_instr
= ir3_instr_clone(orig_instr
);
595 ir3_insert_by_depth(new_instr
, &ctx
->depth_list
);
596 transfer_use(ctx
, orig_instr
, new_instr
);
600 /* "spill" the address register by remapping any unscheduled
601 * instructions which depend on the current address register
602 * to a clone of the instruction which wrote the address reg.
604 static struct ir3_instruction
*
605 split_addr(struct ir3_sched_ctx
*ctx
)
608 struct ir3_instruction
*new_addr
= NULL
;
611 debug_assert(ctx
->addr
);
613 ir
= ctx
->addr
->block
->shader
;
615 for (i
= 0; i
< ir
->indirects_count
; i
++) {
616 struct ir3_instruction
*indirect
= ir
->indirects
[i
];
621 /* skip instructions already scheduled: */
622 if (is_scheduled(indirect
))
625 /* remap remaining instructions using current addr
628 if (indirect
->address
== ctx
->addr
) {
630 new_addr
= split_instr(ctx
, ctx
->addr
);
631 /* original addr is scheduled, but new one isn't: */
632 new_addr
->flags
&= ~IR3_INSTR_MARK
;
634 indirect
->address
= NULL
;
635 ir3_instr_set_address(indirect
, new_addr
);
639 /* all remaining indirects remapped to new addr: */
645 /* "spill" the predicate register by remapping any unscheduled
646 * instructions which depend on the current predicate register
647 * to a clone of the instruction which wrote the address reg.
649 static struct ir3_instruction
*
650 split_pred(struct ir3_sched_ctx
*ctx
)
653 struct ir3_instruction
*new_pred
= NULL
;
656 debug_assert(ctx
->pred
);
658 ir
= ctx
->pred
->block
->shader
;
660 for (i
= 0; i
< ir
->predicates_count
; i
++) {
661 struct ir3_instruction
*predicated
= ir
->predicates
[i
];
663 /* skip instructions already scheduled: */
664 if (is_scheduled(predicated
))
667 /* remap remaining instructions using current pred
670 * TODO is there ever a case when pred isn't first
673 if (ssa(predicated
->regs
[1]) == ctx
->pred
) {
675 new_pred
= split_instr(ctx
, ctx
->pred
);
676 /* original pred is scheduled, but new one isn't: */
677 new_pred
->flags
&= ~IR3_INSTR_MARK
;
679 predicated
->regs
[1]->instr
= new_pred
;
683 /* all remaining predicated remapped to new pred: */
690 sched_block(struct ir3_sched_ctx
*ctx
, struct ir3_block
*block
)
692 struct list_head unscheduled_list
;
696 /* addr/pred writes are per-block: */
700 /* move all instructions to the unscheduled list, and
701 * empty the block's instruction list (to which we will
704 list_replace(&block
->instr_list
, &unscheduled_list
);
705 list_inithead(&block
->instr_list
);
706 list_inithead(&ctx
->depth_list
);
708 /* First schedule all meta:input instructions, followed by
709 * tex-prefetch. We want all of the instructions that load
710 * values into registers before the shader starts to go
711 * before any other instructions. But in particular we
712 * want inputs to come before prefetches. This is because
713 * a FS's bary_ij input may not actually be live in the
714 * shader, but it should not be scheduled on top of any
715 * other input (but can be overwritten by a tex prefetch)
717 * Finally, move all the remaining instructions to the depth-
720 foreach_instr_safe (instr
, &unscheduled_list
)
721 if (instr
->opc
== OPC_META_INPUT
)
722 schedule(ctx
, instr
);
724 foreach_instr_safe (instr
, &unscheduled_list
)
725 if (instr
->opc
== OPC_META_TEX_PREFETCH
)
726 schedule(ctx
, instr
);
728 foreach_instr_safe (instr
, &unscheduled_list
)
729 ir3_insert_by_depth(instr
, &ctx
->depth_list
);
731 while (!list_is_empty(&ctx
->depth_list
)) {
732 struct ir3_sched_notes notes
= {0};
733 struct ir3_instruction
*instr
;
735 instr
= find_eligible_instr(ctx
, ¬es
, true);
737 instr
= find_eligible_instr(ctx
, ¬es
, false);
740 unsigned delay
= ir3_delay_calc(ctx
->block
, instr
, false, false);
741 d("delay=%u", delay
);
743 /* and if we run out of instructions that can be scheduled,
744 * then it is time for nop's:
746 debug_assert(delay
<= 6);
752 schedule(ctx
, instr
);
754 struct ir3_instruction
*new_instr
= NULL
;
756 /* nothing available to schedule.. if we are blocked on
757 * address/predicate register conflict, then break the
758 * deadlock by cloning the instruction that wrote that
761 if (notes
.addr_conflict
) {
762 new_instr
= split_addr(ctx
);
763 } else if (notes
.pred_conflict
) {
764 new_instr
= split_pred(ctx
);
772 /* clearing current addr/pred can change what is
773 * available to schedule, so clear cache..
775 clear_cache(ctx
, NULL
);
777 ir3_insert_by_depth(new_instr
, &ctx
->depth_list
);
778 /* the original instr that wrote addr/pred may have
779 * originated from a different block:
781 new_instr
->block
= block
;
788 has_latency_to_hide(struct ir3
*ir
)
790 foreach_block (block
, &ir
->block_list
) {
791 foreach_instr (instr
, &block
->instr_list
) {
795 if (is_load(instr
)) {
796 switch (instr
->opc
) {
812 setup_thresholds(struct ir3_sched_ctx
*ctx
, struct ir3
*ir
)
814 if (has_latency_to_hide(ir
)) {
815 ctx
->live_threshold_hi
= 2 * 16 * 4;
816 ctx
->live_threshold_lo
= 2 * 4 * 4;
817 ctx
->depth_threshold_hi
= 6;
818 ctx
->depth_threshold_lo
= 4;
820 ctx
->live_threshold_hi
= 2 * 16 * 4;
821 ctx
->live_threshold_lo
= 2 * 12 * 4;
822 ctx
->depth_threshold_hi
= 16;
823 ctx
->depth_threshold_lo
= 16;
827 int ir3_sched(struct ir3
*ir
)
829 struct ir3_sched_ctx ctx
= {0};
831 setup_thresholds(&ctx
, ir
);
834 update_use_count(ir
);
836 foreach_block (block
, &ir
->block_list
) {
838 ctx
.half_live_values
= 0;
839 sched_block(&ctx
, block
);
849 get_array_id(struct ir3_instruction
*instr
)
851 /* The expectation is that there is only a single array
852 * src or dst, ir3_cp should enforce this.
855 for (unsigned i
= 0; i
< instr
->regs_count
; i
++)
856 if (instr
->regs
[i
]->flags
& IR3_REG_ARRAY
)
857 return instr
->regs
[i
]->array
.id
;
859 unreachable("this was unexpected");
862 /* does instruction 'prior' need to be scheduled before 'instr'? */
864 depends_on(struct ir3_instruction
*instr
, struct ir3_instruction
*prior
)
866 /* TODO for dependencies that are related to a specific object, ie
867 * a specific SSBO/image/array, we could relax this constraint to
868 * make accesses to unrelated objects not depend on each other (at
869 * least as long as not declared coherent)
871 if (((instr
->barrier_class
& IR3_BARRIER_EVERYTHING
) && prior
->barrier_class
) ||
872 ((prior
->barrier_class
& IR3_BARRIER_EVERYTHING
) && instr
->barrier_class
))
875 if (instr
->barrier_class
& prior
->barrier_conflict
) {
876 if (!(instr
->barrier_class
& ~(IR3_BARRIER_ARRAY_R
| IR3_BARRIER_ARRAY_W
))) {
877 /* if only array barrier, then we can further limit false-deps
878 * by considering the array-id, ie reads/writes to different
879 * arrays do not depend on each other (no aliasing)
881 if (get_array_id(instr
) != get_array_id(prior
)) {
893 add_barrier_deps(struct ir3_block
*block
, struct ir3_instruction
*instr
)
895 struct list_head
*prev
= instr
->node
.prev
;
896 struct list_head
*next
= instr
->node
.next
;
898 /* add dependencies on previous instructions that must be scheduled
899 * prior to the current instruction
901 while (prev
!= &block
->instr_list
) {
902 struct ir3_instruction
*pi
=
903 LIST_ENTRY(struct ir3_instruction
, prev
, node
);
910 if (instr
->barrier_class
== pi
->barrier_class
) {
911 ir3_instr_add_dep(instr
, pi
);
915 if (depends_on(instr
, pi
))
916 ir3_instr_add_dep(instr
, pi
);
919 /* add dependencies on this instruction to following instructions
920 * that must be scheduled after the current instruction:
922 while (next
!= &block
->instr_list
) {
923 struct ir3_instruction
*ni
=
924 LIST_ENTRY(struct ir3_instruction
, next
, node
);
931 if (instr
->barrier_class
== ni
->barrier_class
) {
932 ir3_instr_add_dep(ni
, instr
);
936 if (depends_on(ni
, instr
))
937 ir3_instr_add_dep(ni
, instr
);
941 /* before scheduling a block, we need to add any necessary false-dependencies
944 * (1) barriers are scheduled in the right order wrt instructions related
947 * (2) reads that come before a write actually get scheduled before the
951 calculate_deps(struct ir3_block
*block
)
953 foreach_instr (instr
, &block
->instr_list
) {
954 if (instr
->barrier_class
) {
955 add_barrier_deps(block
, instr
);
961 ir3_sched_add_deps(struct ir3
*ir
)
963 foreach_block (block
, &ir
->block_list
) {
964 calculate_deps(block
);