1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "pipe/p_shader_tokens.h"
30 #include "util/u_math.h"
33 #include "ir3_visitor.h"
36 * Register Assignment:
38 * NOTE: currently only works on a single basic block.. need to think
39 * about how multiple basic blocks are going to get scheduled. But
40 * I think I want to re-arrange how blocks work, ie. get rid of the
41 * block nesting thing..
43 * NOTE: we could do register coalescing (eliminate moves) as part of
44 * the RA step.. OTOH I think we need to do scheduling before register
45 * assignment. And if we remove a mov that effects scheduling (unless
46 * we leave a placeholder nop, which seems lame), so I'm not really
47 * sure how practical this is to do both in a single stage. But OTOH
48 * I'm not really sure a sane way for the CP stage to realize when it
49 * cannot remove a mov due to multi-register constraints..
54 struct ir3_block
*block
;
65 #define ra_dump_list(msg, n) do { \
67 debug_printf("-- " msg); \
68 ir3_dump_instr_list(n); \
72 #define ra_dump_instr(msg, n) do { \
74 debug_printf(">> " msg); \
75 ir3_dump_instr_single(n); \
79 /* sorta ugly way to retrofit half-precision support.. rather than
80 * passing extra param around, just OR in a high bit. All the low
81 * value arithmetic (ie. +/- offset within a contiguous vec4, etc)
82 * will continue to work as long as you don't underflow (and that
83 * would go badly anyways).
85 #define REG_HALF 0x8000
87 struct ir3_ra_assignment
{
88 int8_t off
; /* offset of instruction dst within range */
89 uint8_t num
; /* number of components for the range */
92 static void ra_assign(struct ir3_ra_ctx
*ctx
,
93 struct ir3_instruction
*assigner
, int num
);
94 static struct ir3_ra_assignment
ra_calc(struct ir3_instruction
*instr
);
97 * Register Allocation:
100 #define REG(n, wm, f) (struct ir3_register){ \
103 .wrmask = TGSI_WRITEMASK_ ## wm, \
106 /* check that the register exists, is a GPR and is not special (a0/p0) */
107 static struct ir3_register
* reg_check(struct ir3_instruction
*instr
, unsigned n
)
109 if ((n
< instr
->regs_count
) && reg_gpr(instr
->regs
[n
]) &&
110 !(instr
->regs
[n
]->flags
& IR3_REG_SSA
))
111 return instr
->regs
[n
];
115 static int output_base(struct ir3_ra_ctx
*ctx
)
117 /* ugg, for fragment shader we need to have input at r0.x
118 * (or at least if there is a way to configure it, I can't
119 * see how because the blob driver always uses r0.x (ie.
122 if (ctx
->type
== SHADER_FRAGMENT
) {
123 if (ctx
->half_precision
)
124 return ctx
->frag_face
? 4 : 3;
125 return ctx
->frag_coord
? 8 : 4;
130 /* live means read before written */
131 static void compute_liveregs(struct ir3_ra_ctx
*ctx
,
132 struct ir3_instruction
*instr
, regmask_t
*liveregs
)
134 struct ir3_block
*block
= instr
->block
;
138 regmask_init(liveregs
);
139 regmask_init(&written
);
141 for (instr
= instr
->next
; instr
; instr
= instr
->next
) {
142 struct ir3_register
*r
;
147 /* check first src's read: */
148 for (j
= 1; j
< instr
->regs_count
; j
++) {
149 r
= reg_check(instr
, j
);
151 regmask_set_if_not(liveregs
, r
, &written
);
154 /* then dst written (if assigned already): */
155 if (instr
->flags
& IR3_INSTR_MARK
) {
156 r
= reg_check(instr
, 0);
158 regmask_set(&written
, r
);
162 /* be sure to account for output registers too: */
163 for (i
= 0; i
< block
->noutputs
; i
++) {
164 struct ir3_register reg
= REG(output_base(ctx
) + i
, X
, 0);
165 regmask_set_if_not(liveregs
, ®
, &written
);
169 /* calculate registers that are clobbered before last use of 'assigner'.
170 * This needs to be done backwards, although it could possibly be
171 * combined into compute_liveregs(). (Ie. compute_liveregs() could
172 * reverse the list, then do this part backwards reversing the list
173 * again back to original order.) Otoh, probably I should try to
174 * construct a proper interference graph instead.
176 * XXX this need to follow the same recursion path that is used for
177 * to rename/assign registers (ie. ra_assign_src()).. this is a bit
178 * ugly right now, maybe refactor into node iterator sort of things
179 * that iterates nodes in the correct order?
181 static bool compute_clobbers(struct ir3_ra_ctx
*ctx
,
182 struct ir3_instruction
*instr
, struct ir3_instruction
*assigner
,
186 bool live
= false, was_live
= false;
189 struct ir3_block
*block
= ctx
->block
;
191 /* if at the end, check outputs: */
192 for (i
= 0; i
< block
->noutputs
; i
++)
193 if (block
->outputs
[i
] == assigner
)
198 for (i
= 1; i
< instr
->regs_count
; i
++) {
199 struct ir3_register
*reg
= instr
->regs
[i
];
200 if ((reg
->flags
& IR3_REG_SSA
) && (reg
->instr
== assigner
)) {
201 if (is_meta(instr
)) {
202 switch (instr
->opc
) {
209 was_live
|= compute_clobbers(ctx
, instr
->next
,
221 was_live
|= compute_clobbers(ctx
, instr
->next
, assigner
, liveregs
);
223 if (was_live
&& (instr
->regs_count
> 0) &&
224 (instr
->flags
& IR3_INSTR_MARK
) &&
226 regmask_set(liveregs
, instr
->regs
[0]);
228 return live
|| was_live
;
231 static int find_available(regmask_t
*liveregs
, int size
, bool half
)
234 unsigned f
= half
? IR3_REG_HALF
: 0;
235 for (i
= 0; i
< MAX_REG
- size
; i
++) {
236 if (!regmask_get(liveregs
, ®(i
, X
, f
))) {
237 unsigned start
= i
++;
238 for (; (i
< MAX_REG
) && ((i
- start
) < size
); i
++)
239 if (regmask_get(liveregs
, ®(i
, X
, f
)))
241 if ((i
- start
) >= size
)
249 static int alloc_block(struct ir3_ra_ctx
*ctx
,
250 struct ir3_instruction
*instr
, int size
)
253 /* special case, allocating shader outputs. At this
254 * point, nothing is allocated, just start the shader
255 * outputs at r0.x and let compute_liveregs() take
256 * care of the rest from here:
260 struct ir3_register
*dst
= instr
->regs
[0];
263 compute_liveregs(ctx
, instr
, &liveregs
);
265 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
266 // XXX hack.. maybe ra_calc should give us a list of
267 // instrs to compute_clobbers() on?
268 if (is_meta(instr
) && (instr
->opc
== OPC_META_INPUT
) &&
269 (instr
->regs_count
== 1)) {
270 unsigned i
, base
= instr
->regs
[0]->num
& ~0x3;
271 for (i
= 0; i
< 4; i
++) {
272 struct ir3_instruction
*in
= NULL
;
273 if ((base
+ i
) < ctx
->block
->ninputs
)
274 in
= ctx
->block
->inputs
[base
+ i
];
276 compute_clobbers(ctx
, in
->next
, in
, &liveregs
);
279 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
280 compute_clobbers(ctx
, instr
->next
, instr
, &liveregs
);
282 return find_available(&liveregs
, size
,
283 !!(dst
->flags
& IR3_REG_HALF
));
288 * Constraint Calculation:
291 struct ra_calc_visitor
{
292 struct ir3_visitor base
;
293 struct ir3_ra_assignment a
;
296 static inline struct ra_calc_visitor
*ra_calc_visitor(struct ir3_visitor
*v
)
298 return (struct ra_calc_visitor
*)v
;
301 /* calculate register assignment for the instruction. If the register
302 * written by this instruction is required to be part of a range, to
303 * handle other (input/output/sam/bary.f/etc) contiguous register range
304 * constraints, that is calculated handled here.
306 static void ra_calc_dst(struct ir3_visitor
*v
,
307 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
309 struct ra_calc_visitor
*c
= ra_calc_visitor(v
);
320 ra_calc_dst_shader_input(struct ir3_visitor
*v
,
321 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
323 struct ra_calc_visitor
*c
= ra_calc_visitor(v
);
324 struct ir3_block
*block
= instr
->block
;
325 struct ir3_register
*dst
= instr
->regs
[0];
326 unsigned base
= dst
->num
& ~0x3;
329 assert(!(dst
->flags
& IR3_REG_IA
));
331 /* check what input components we need: */
332 for (i
= 0; i
< 4; i
++) {
333 unsigned idx
= base
+ i
;
334 if ((idx
< block
->ninputs
) && block
->inputs
[idx
])
338 c
->a
.off
= dst
->num
- base
;
342 static void ra_calc_src_fanin(struct ir3_visitor
*v
,
343 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
345 struct ra_calc_visitor
*c
= ra_calc_visitor(v
);
346 unsigned srcn
= ir3_instr_regno(instr
, reg
) - 1;
349 c
->a
.num
= MAX2(c
->a
.num
, instr
->regs_count
- 1);
352 static const struct ir3_visitor_funcs calc_visitor_funcs
= {
353 .instr
= ir3_visit_instr
,
354 .dst_shader_input
= ra_calc_dst_shader_input
,
355 .dst_fanout
= ra_calc_dst
,
356 .dst_fanin
= ra_calc_dst
,
358 .src_fanout
= ir3_visit_reg
,
359 .src_fanin
= ra_calc_src_fanin
,
360 .src
= ir3_visit_reg
,
363 static struct ir3_ra_assignment
ra_calc(struct ir3_instruction
*assigner
)
365 struct ra_calc_visitor v
= {
366 .base
.funcs
= &calc_visitor_funcs
,
369 ir3_visit_instr(&v
.base
, assigner
);
375 * Register Assignment:
378 struct ra_assign_visitor
{
379 struct ir3_visitor base
;
380 struct ir3_ra_ctx
*ctx
;
384 static inline struct ra_assign_visitor
*ra_assign_visitor(struct ir3_visitor
*v
)
386 return (struct ra_assign_visitor
*)v
;
389 static type_t
half_type(type_t type
)
392 case TYPE_F32
: return TYPE_F16
;
393 case TYPE_U32
: return TYPE_U16
;
394 case TYPE_S32
: return TYPE_S16
;
395 /* instructions may already be fixed up: */
406 /* some instructions need fix-up if dst register is half precision: */
407 static void fixup_half_instr_dst(struct ir3_instruction
*instr
)
409 switch (instr
->category
) {
410 case 1: /* move instructions */
411 instr
->cat1
.dst_type
= half_type(instr
->cat1
.dst_type
);
414 switch (instr
->opc
) {
416 instr
->opc
= OPC_MAD_F16
;
419 instr
->opc
= OPC_SEL_B16
;
422 instr
->opc
= OPC_SEL_S16
;
425 instr
->opc
= OPC_SEL_F16
;
428 instr
->opc
= OPC_SAD_S16
;
430 /* instructions may already be fixed up: */
443 instr
->cat5
.type
= half_type(instr
->cat5
.type
);
447 /* some instructions need fix-up if src register is half precision: */
448 static void fixup_half_instr_src(struct ir3_instruction
*instr
)
450 switch (instr
->category
) {
451 case 1: /* move instructions */
452 instr
->cat1
.src_type
= half_type(instr
->cat1
.src_type
);
457 static void ra_assign_reg(struct ir3_visitor
*v
,
458 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
460 struct ra_assign_visitor
*a
= ra_assign_visitor(v
);
462 reg
->flags
&= ~IR3_REG_SSA
;
463 reg
->num
= a
->num
& ~REG_HALF
;
465 assert(reg
->num
>= 0);
467 if (a
->num
& REG_HALF
) {
468 reg
->flags
|= IR3_REG_HALF
;
469 /* if dst reg being assigned, patch up the instr: */
470 if (reg
== instr
->regs
[0])
471 fixup_half_instr_dst(instr
);
473 fixup_half_instr_src(instr
);
477 static void ra_assign_dst_shader_input(struct ir3_visitor
*v
,
478 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
480 struct ra_assign_visitor
*a
= ra_assign_visitor(v
);
481 unsigned i
, base
= reg
->num
& ~0x3;
482 int off
= base
- reg
->num
;
484 ra_assign_reg(v
, instr
, reg
);
485 reg
->flags
|= IR3_REG_IA
;
487 /* trigger assignment of all our companion input components: */
488 for (i
= 0; i
< 4; i
++) {
489 struct ir3_instruction
*in
= NULL
;
490 if ((base
+ i
) < instr
->block
->ninputs
)
491 in
= instr
->block
->inputs
[base
+ i
];
492 if (in
&& is_meta(in
) && (in
->opc
== OPC_META_INPUT
))
493 ra_assign(a
->ctx
, in
, a
->num
+ off
+ i
);
497 static void ra_assign_dst_fanout(struct ir3_visitor
*v
,
498 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
500 struct ra_assign_visitor
*a
= ra_assign_visitor(v
);
501 struct ir3_register
*src
= instr
->regs
[1];
502 ra_assign_reg(v
, instr
, reg
);
503 if (src
->flags
& IR3_REG_SSA
)
504 ra_assign(a
->ctx
, src
->instr
, a
->num
- instr
->fo
.off
);
507 static void ra_assign_src_fanout(struct ir3_visitor
*v
,
508 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
510 struct ra_assign_visitor
*a
= ra_assign_visitor(v
);
511 ra_assign_reg(v
, instr
, reg
);
512 ra_assign(a
->ctx
, instr
, a
->num
+ instr
->fo
.off
);
516 static void ra_assign_src_fanin(struct ir3_visitor
*v
,
517 struct ir3_instruction
*instr
, struct ir3_register
*reg
)
519 struct ra_assign_visitor
*a
= ra_assign_visitor(v
);
520 unsigned j
, srcn
= ir3_instr_regno(instr
, reg
) - 1;
521 ra_assign_reg(v
, instr
, reg
);
522 ra_assign(a
->ctx
, instr
, a
->num
- srcn
);
523 for (j
= 1; j
< instr
->regs_count
; j
++) {
524 struct ir3_register
*reg
= instr
->regs
[j
];
525 if (reg
->flags
& IR3_REG_SSA
) /* could be renamed already */
526 ra_assign(a
->ctx
, reg
->instr
, a
->num
- srcn
+ j
- 1);
530 static const struct ir3_visitor_funcs assign_visitor_funcs
= {
531 .instr
= ir3_visit_instr
,
532 .dst_shader_input
= ra_assign_dst_shader_input
,
533 .dst_fanout
= ra_assign_dst_fanout
,
534 .dst_fanin
= ra_assign_reg
,
535 .dst
= ra_assign_reg
,
536 .src_fanout
= ra_assign_src_fanout
,
537 .src_fanin
= ra_assign_src_fanin
,
538 .src
= ra_assign_reg
,
541 static void ra_assign(struct ir3_ra_ctx
*ctx
,
542 struct ir3_instruction
*assigner
, int num
)
544 struct ra_assign_visitor v
= {
545 .base
.funcs
= &assign_visitor_funcs
,
550 /* if we've already visited this instruction, bail now: */
551 if (ir3_instr_check_mark(assigner
)) {
552 debug_assert(assigner
->regs
[0]->num
== (num
& ~REG_HALF
));
553 if (assigner
->regs
[0]->num
!= (num
& ~REG_HALF
)) {
554 /* impossible situation, should have been resolved
555 * at an earlier stage by inserting extra mov's:
562 ir3_visit_instr(&v
.base
, assigner
);
569 static void ir3_instr_ra(struct ir3_ra_ctx
*ctx
,
570 struct ir3_instruction
*instr
)
572 struct ir3_register
*dst
;
575 /* skip over nop's */
576 if (instr
->regs_count
== 0)
579 dst
= instr
->regs
[0];
581 /* if we've already visited this instruction, bail now: */
582 if (instr
->flags
& IR3_INSTR_MARK
)
585 /* allocate register(s): */
586 if (is_addr(instr
)) {
587 num
= instr
->regs
[2]->num
;
588 } else if (reg_gpr(dst
)) {
589 struct ir3_ra_assignment a
;
591 num
= alloc_block(ctx
, instr
, a
.num
) + a
.off
;
592 } else if (dst
->flags
& IR3_REG_ADDR
) {
593 dst
->flags
&= ~IR3_REG_ADDR
;
594 num
= regid(REG_A0
, 0) | REG_HALF
;
596 /* predicate register (p0).. etc */
597 num
= regid(REG_P0
, 0);
598 debug_assert(dst
->num
== num
);
601 ra_assign(ctx
, instr
, num
);
604 static int block_ra(struct ir3_ra_ctx
*ctx
, struct ir3_block
*block
)
606 struct ir3_instruction
*n
;
608 ra_dump_list("before:\n", block
->head
);
610 if (!block
->parent
) {
612 int base
, off
= output_base(ctx
);
614 base
= alloc_block(ctx
, NULL
, block
->noutputs
+ off
);
616 if (ctx
->half_precision
)
619 for (i
= 0; i
< block
->noutputs
; i
++)
620 if (block
->outputs
[i
] && !is_kill(block
->outputs
[i
]))
621 ra_assign(ctx
, block
->outputs
[i
], base
+ i
+ off
);
623 if (ctx
->type
== SHADER_FRAGMENT
) {
625 if (ctx
->frag_face
) {
626 /* if we have frag_face, it gets hr0.x */
627 ra_assign(ctx
, block
->inputs
[i
], REG_HALF
| 0);
630 for (j
= 0; i
< block
->ninputs
; i
++, j
++)
631 if (block
->inputs
[i
])
632 ra_assign(ctx
, block
->inputs
[i
], (base
& ~REG_HALF
) + j
);
634 for (i
= 0; i
< block
->ninputs
; i
++)
635 if (block
->inputs
[i
])
636 ir3_instr_ra(ctx
, block
->inputs
[i
]);
640 ra_dump_list("after:\n", block
->head
);
642 /* then loop over instruction list and assign registers:
644 for (n
= block
->head
; n
; n
= n
->next
) {
645 ra_dump_instr("ASSIGN: ", n
);
646 ir3_instr_ra(ctx
, n
);
649 ra_dump_list("-------", block
->head
);
655 int ir3_block_ra(struct ir3_block
*block
, enum shader_t type
,
656 bool half_precision
, bool frag_coord
, bool frag_face
)
658 struct ir3_instruction
*n
;
659 struct ir3_ra_ctx ctx
= {
662 .half_precision
= half_precision
,
663 .frag_coord
= frag_coord
,
664 .frag_face
= frag_face
,
668 /* mark dst registers w/ SSA flag so we can see which
669 * have been assigned so far:
671 for (n
= block
->head
; n
; n
= n
->next
)
672 if (n
->regs_count
> 0)
673 n
->regs
[0]->flags
|= IR3_REG_SSA
;
675 ir3_clear_mark(block
->shader
);
676 ret
= block_ra(&ctx
, block
);