1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "pipe/p_shader_tokens.h"
30 #include "util/u_math.h"
33 #include "ir3_visitor.h"
36 * Register Assignment:
38 * NOTE: currently only works on a single basic block.. need to think
39 * about how multiple basic blocks are going to get scheduled. But
40 * I think I want to re-arrange how blocks work, ie. get rid of the
41 * block nesting thing..
43 * NOTE: we could do register coalescing (eliminate moves) as part of
44 * the RA step.. OTOH I think we need to do scheduling before register
45 * assignment. And if we remove a mov that effects scheduling (unless
46 * we leave a placeholder nop, which seems lame), so I'm not really
47 * sure how practical this is to do both in a single stage. But OTOH
48 * I'm not really sure a sane way for the CP stage to realize when it
49 * cannot remove a mov due to multi-register constraints..
54 struct ir3_block
*block
;
60 struct ir3_ra_assignment
{
61 int8_t off
; /* offset of instruction dst within range */
62 uint8_t num
; /* number of components for the range */
65 static void ra_assign(struct ir3_ra_ctx
*ctx
,
66 struct ir3_instruction
*assigner
, int num
);
67 static struct ir3_ra_assignment
ra_calc(struct ir3_instruction
*instr
);
70 * Register Allocation:
73 #define REG(n, wm) (struct ir3_register){ \
74 /*.flags = ((so)->half_precision) ? IR3_REG_HALF : 0,*/ \
76 .wrmask = TGSI_WRITEMASK_ ## wm, \
79 /* check that the register exists, is a GPR and is not special (a0/p0) */
80 static struct ir3_register
* reg_check(struct ir3_instruction
*instr
, unsigned n
)
82 if ((n
< instr
->regs_count
) && reg_gpr(instr
->regs
[n
]))
83 return instr
->regs
[n
];
87 static int output_base(struct ir3_ra_ctx
*ctx
)
89 /* ugg, for fragment shader we need to have input at r0.x
90 * (or at least if there is a way to configure it, I can't
91 * see how because the blob driver always uses r0.x (ie.
94 if (ctx
->type
== SHADER_FRAGMENT
)
99 /* live means read before written */
100 static void compute_liveregs(struct ir3_ra_ctx
*ctx
,
101 struct ir3_instruction
*instr
, regmask_t
*liveregs
)
103 struct ir3_block
*block
= instr
->block
;
107 regmask_init(liveregs
);
108 regmask_init(&written
);
110 for (instr
= instr
->next
; instr
; instr
= instr
->next
) {
111 struct ir3_register
*r
;
116 /* check first src's read: */
117 for (j
= 1; j
< instr
->regs_count
; j
++) {
118 r
= reg_check(instr
, j
);
120 regmask_set_if_not(liveregs
, r
, &written
);
123 /* then dst written (if assigned already): */
124 if (instr
->flags
& IR3_INSTR_MARK
) {
125 r
= reg_check(instr
, 0);
127 regmask_set(&written
, r
);
131 /* be sure to account for output registers too: */
132 for (i
= 0; i
< block
->noutputs
; i
++) {
133 struct ir3_register reg
= REG(output_base(ctx
) + i
, X
);
134 regmask_set_if_not(liveregs
, ®
, &written
);
138 /* calculate registers that are clobbered before last use of 'assigner'.
139 * This needs to be done backwards, although it could possibly be
140 * combined into compute_liveregs(). (Ie. compute_liveregs() could
141 * reverse the list, then do this part backwards reversing the list
142 * again back to original order.) Otoh, probably I should try to
143 * construct a proper interference graph instead.
145 * XXX this need to follow the same recursion path that is used for
146 * to rename/assign registers (ie. ra_assign_src()).. this is a bit
147 * ugly right now, maybe refactor into node iterator sort of things
148 * that iterates nodes in the correct order?
150 static bool compute_clobbers(struct ir3_ra_ctx
*ctx
,
151 struct ir3_instruction
*instr
, struct ir3_instruction
*assigner
,
155 bool live
= false, was_live
= false;
158 struct ir3_block
*block
= ctx
->block
;
160 /* if at the end, check outputs: */
161 for (i
= 0; i
< block
->noutputs
; i
++)
162 if (block
->outputs
[i
] == assigner
)
167 for (i
= 1; i
< instr
->regs_count
; i
++) {
168 struct ir3_register
*reg
= instr
->regs
[i
];
169 if ((reg
->flags
& IR3_REG_SSA
) && (reg
->instr
== assigner
)) {
170 if (is_meta(instr
)) {
171 switch (instr
->opc
) {
178 was_live
|= compute_clobbers(ctx
, instr
->next
,
190 was_live
|= compute_clobbers(ctx
, instr
->next
, assigner
, liveregs
);
192 if (was_live
&& (instr
->regs_count
> 0) &&
193 (instr
->flags
& IR3_INSTR_MARK
) &&
195 regmask_set(liveregs
, instr
->regs
[0]);
197 return live
|| was_live
;
200 static int find_available(regmask_t
*liveregs
, int size
)
203 for (i
= 0; i
< MAX_REG
- size
; i
++) {
204 if (!regmask_get(liveregs
, ®(i
, X
))) {
205 unsigned start
= i
++;
206 for (; (i
< MAX_REG
) && ((i
- start
) < size
); i
++)
207 if (regmask_get(liveregs
, ®(i
, X
)))
209 if ((i
- start
) >= size
)
217 static int alloc_block(struct ir3_ra_ctx
*ctx
,
218 struct ir3_instruction
*instr
, int size
)
221 /* special case, allocating shader outputs. At this
222 * point, nothing is allocated, just start the shader
223 * outputs at r0.x and let compute_liveregs() take
224 * care of the rest from here:
229 compute_liveregs(ctx
, instr
, &liveregs
);
231 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
232 // XXX hack.. maybe ra_calc should give us a list of
233 // instrs to compute_clobbers() on?
234 if (is_meta(instr
) && (instr
->opc
== OPC_META_INPUT
) &&
235 (instr
->regs_count
== 1)) {
236 unsigned i
, base
= instr
->regs
[0]->num
& ~0x3;
237 for (i
= 0; i
< 4; i
++) {
238 struct ir3_instruction
*in
= ctx
->block
->inputs
[base
+ i
];
240 compute_clobbers(ctx
, in
->next
, in
, &liveregs
);
243 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
244 compute_clobbers(ctx
, instr
->next
, instr
, &liveregs
);
245 return find_available(&liveregs
, size
);
250 * Constraint Calculation:
253 struct ra_calc_visitor
{
254 struct ir3_visitor base
;
255 struct ir3_ra_assignment a
;
258 static inline struct ra_calc_visitor
*ra_calc_visitor(struct ir3_visitor
*v
)
260 return (struct ra_calc_visitor
*)v
;
263 /* calculate register assignment for the instruction. If the register
264 * written by this instruction is required to be part of a range, to
265 * handle other (input/output/sam/bary.f/etc) contiguous register range
266 * constraints, that is calculated handled here.
268 static void ra_calc_dst(struct ir3_visitor
*v
,
269 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
271 struct ra_calc_visitor
*c
= ra_calc_visitor(v
);
282 ra_calc_dst_shader_input(struct ir3_visitor
*v
,
283 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
285 struct ra_calc_visitor
*c
= ra_calc_visitor(v
);
286 struct ir3_block
*block
= instr
->block
;
287 struct ir3_register
*dst
= instr
->regs
[0];
288 unsigned base
= dst
->num
& ~0x3;
291 assert(!(dst
->flags
& IR3_REG_IA
));
293 /* check what input components we need: */
294 for (i
= 0; i
< 4; i
++) {
295 unsigned idx
= base
+ i
;
296 if ((idx
< block
->ninputs
) && block
->inputs
[idx
])
300 c
->a
.off
= dst
->num
- base
;
304 static void ra_calc_src_fanin(struct ir3_visitor
*v
,
305 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
307 struct ra_calc_visitor
*c
= ra_calc_visitor(v
);
308 unsigned srcn
= ir3_instr_regno(instr
, reg
) - 1;
311 c
->a
.num
= MAX2(c
->a
.num
, instr
->regs_count
- 1);
314 static const struct ir3_visitor_funcs calc_visitor_funcs
= {
315 .instr
= ir3_visit_instr
,
316 .dst_shader_input
= ra_calc_dst_shader_input
,
317 .dst_fanout
= ra_calc_dst
,
318 .dst_fanin
= ra_calc_dst
,
320 .src_fanout
= ir3_visit_reg
,
321 .src_fanin
= ra_calc_src_fanin
,
322 .src
= ir3_visit_reg
,
325 static struct ir3_ra_assignment
ra_calc(struct ir3_instruction
*assigner
)
327 struct ra_calc_visitor v
= {
328 .base
.funcs
= &calc_visitor_funcs
,
331 ir3_visit_instr(&v
.base
, assigner
);
337 * Register Assignment:
340 struct ra_assign_visitor
{
341 struct ir3_visitor base
;
342 struct ir3_ra_ctx
*ctx
;
346 static inline struct ra_assign_visitor
*ra_assign_visitor(struct ir3_visitor
*v
)
348 return (struct ra_assign_visitor
*)v
;
351 static void ra_assign_reg(struct ir3_visitor
*v
,
352 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
354 struct ra_assign_visitor
*a
= ra_assign_visitor(v
);
355 reg
->flags
&= ~IR3_REG_SSA
;
359 static void ra_assign_dst_shader_input(struct ir3_visitor
*v
,
360 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
362 struct ra_assign_visitor
*a
= ra_assign_visitor(v
);
363 unsigned i
, base
= reg
->num
& ~0x3;
364 int off
= base
- reg
->num
;
366 ra_assign_reg(v
, instr
, reg
);
367 reg
->flags
|= IR3_REG_IA
;
369 /* trigger assignment of all our companion input components: */
370 for (i
= 0; i
< 4; i
++) {
371 struct ir3_instruction
*in
= instr
->block
->inputs
[i
+base
];
372 if (in
&& is_meta(in
) && (in
->opc
== OPC_META_INPUT
))
373 ra_assign(a
->ctx
, in
, a
->num
+ off
+ i
);
377 static void ra_assign_dst_fanout(struct ir3_visitor
*v
,
378 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
380 struct ra_assign_visitor
*a
= ra_assign_visitor(v
);
381 struct ir3_register
*src
= instr
->regs
[1];
382 ra_assign_reg(v
, instr
, reg
);
383 if (src
->flags
& IR3_REG_SSA
)
384 ra_assign(a
->ctx
, src
->instr
, a
->num
- instr
->fo
.off
);
387 static void ra_assign_src_fanout(struct ir3_visitor
*v
,
388 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
390 struct ra_assign_visitor
*a
= ra_assign_visitor(v
);
391 ra_assign_reg(v
, instr
, reg
);
392 ra_assign(a
->ctx
, instr
, a
->num
+ instr
->fo
.off
);
396 static void ra_assign_src_fanin(struct ir3_visitor
*v
,
397 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
399 struct ra_assign_visitor
*a
= ra_assign_visitor(v
);
400 unsigned j
, srcn
= ir3_instr_regno(instr
, reg
) - 1;
401 ra_assign_reg(v
, instr
, reg
);
402 ra_assign(a
->ctx
, instr
, a
->num
- srcn
);
403 for (j
= 1; j
< instr
->regs_count
; j
++) {
404 struct ir3_register
*reg
= instr
->regs
[j
];
405 if (reg
->flags
& IR3_REG_SSA
) /* could be renamed already */
406 ra_assign(a
->ctx
, reg
->instr
, a
->num
- srcn
+ j
- 1);
410 static const struct ir3_visitor_funcs assign_visitor_funcs
= {
411 .instr
= ir3_visit_instr
,
412 .dst_shader_input
= ra_assign_dst_shader_input
,
413 .dst_fanout
= ra_assign_dst_fanout
,
414 .dst_fanin
= ra_assign_reg
,
415 .dst
= ra_assign_reg
,
416 .src_fanout
= ra_assign_src_fanout
,
417 .src_fanin
= ra_assign_src_fanin
,
418 .src
= ra_assign_reg
,
421 static void ra_assign(struct ir3_ra_ctx
*ctx
,
422 struct ir3_instruction
*assigner
, int num
)
424 struct ra_assign_visitor v
= {
425 .base
.funcs
= &assign_visitor_funcs
,
430 /* if we've already visited this instruction, bail now: */
431 if (ir3_instr_check_mark(assigner
)) {
432 debug_assert(assigner
->regs
[0]->num
== num
);
433 if (assigner
->regs
[0]->num
!= num
) {
434 /* impossible situation, should have been resolved
435 * at an earlier stage by inserting extra mov's:
442 ir3_visit_instr(&v
.base
, assigner
);
449 static void ir3_instr_ra(struct ir3_ra_ctx
*ctx
,
450 struct ir3_instruction
*instr
)
452 struct ir3_ra_assignment a
;
455 /* skip over nop's */
456 if (instr
->regs_count
== 0)
459 /* skip writes to a0, p0, etc */
460 if (!reg_gpr(instr
->regs
[0]))
463 /* if we've already visited this instruction, bail now: */
464 if (instr
->flags
& IR3_INSTR_MARK
)
467 /* allocate register(s): */
469 num
= alloc_block(ctx
, instr
, a
.num
) + a
.off
;
471 ra_assign(ctx
, instr
, num
);
474 /* flatten into shader: */
475 // XXX this should probably be somewhere else:
476 static void legalize(struct ir3_ra_ctx
*ctx
, struct ir3_block
*block
)
478 struct ir3_instruction
*n
;
479 struct ir3_shader
*shader
= block
->shader
;
480 struct ir3_instruction
*end
=
481 ir3_instr_create(block
, 0, OPC_END
);
482 struct ir3_instruction
*last_input
= NULL
;
483 regmask_t needs_ss_war
;
487 regmask_init(&needs_ss_war
);
488 regmask_init(&needs_ss
);
489 regmask_init(&needs_sy
);
491 shader
->instrs_count
= 0;
493 for (n
= block
->head
; n
; n
= n
->next
) {
494 struct ir3_register
*reg
;
500 for (i
= 1; i
< n
->regs_count
; i
++) {
505 /* TODO: we probably only need (ss) for alu
506 * instr consuming sfu result.. need to make
507 * some tests for both this and (sy)..
509 if (regmask_get(&needs_ss
, reg
)) {
510 n
->flags
|= IR3_INSTR_SS
;
511 regmask_init(&needs_ss
);
514 if (regmask_get(&needs_sy
, reg
)) {
515 n
->flags
|= IR3_INSTR_SY
;
516 regmask_init(&needs_sy
);
521 if (n
->regs_count
> 0) {
523 if (regmask_get(&needs_ss_war
, reg
)) {
524 n
->flags
|= IR3_INSTR_SS
;
525 regmask_init(&needs_ss_war
); // ??? I assume?
529 /* cat5+ does not have an (ss) bit, if needed we need to
530 * insert a nop to carry the sync flag. Would be kinda
531 * clever if we were aware of this during scheduling, but
532 * this should be a pretty rare case:
534 if ((n
->flags
& IR3_INSTR_SS
) && (n
->category
>= 5)) {
535 struct ir3_instruction
*nop
;
536 nop
= ir3_instr_create(block
, 0, OPC_NOP
);
537 nop
->flags
|= IR3_INSTR_SS
;
538 n
->flags
&= ~IR3_INSTR_SS
;
541 /* need to be able to set (ss) on first instruction: */
542 if ((shader
->instrs_count
== 0) && (n
->category
>= 5))
543 ir3_instr_create(block
, 0, OPC_NOP
);
545 shader
->instrs
[shader
->instrs_count
++] = n
;
548 regmask_set(&needs_ss
, n
->regs
[0]);
551 regmask_set(&needs_sy
, n
->regs
[0]);
553 /* both tex/sfu appear to not always immediately consume
554 * their src register(s):
556 if (is_tex(n
) || is_sfu(n
)) {
557 for (i
= 1; i
< n
->regs_count
; i
++) {
560 regmask_set(&needs_ss_war
, reg
);
569 last_input
->regs
[0]->flags
|= IR3_REG_EI
;
571 shader
->instrs
[shader
->instrs_count
++] = end
;
573 shader
->instrs
[0]->flags
|= IR3_INSTR_SS
| IR3_INSTR_SY
;
576 static int block_ra(struct ir3_ra_ctx
*ctx
, struct ir3_block
*block
)
578 struct ir3_instruction
*n
;
580 if (!block
->parent
) {
582 int base
, off
= output_base(ctx
);
584 base
= alloc_block(ctx
, NULL
, block
->noutputs
+ off
);
586 for (i
= 0; i
< block
->noutputs
; i
++)
587 if (block
->outputs
[i
])
588 ra_assign(ctx
, block
->outputs
[i
], base
+ i
+ off
);
590 if (ctx
->type
== SHADER_FRAGMENT
) {
591 for (i
= 0; i
< block
->ninputs
; i
++)
592 if (block
->inputs
[i
])
593 ra_assign(ctx
, block
->inputs
[i
], base
+ i
);
595 for (i
= 0; i
< block
->ninputs
; i
++)
596 if (block
->inputs
[i
])
597 ir3_instr_ra(ctx
, block
->inputs
[i
]);
601 /* then loop over instruction list and assign registers:
605 ir3_instr_ra(ctx
, n
);
611 legalize(ctx
, block
);
616 int ir3_block_ra(struct ir3_block
*block
, enum shader_t type
)
618 struct ir3_ra_ctx ctx
= {
622 ir3_shader_clear_mark(block
->shader
);
623 return block_ra(&ctx
, block
);