freedreno/a3xx: little extra debug
[mesa.git] / src / gallium / drivers / freedreno / a3xx / ir3_ra.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_shader_tokens.h"
30 #include "util/u_math.h"
31
32 #include "ir3.h"
33 #include "ir3_visitor.h"
34
35 /*
36 * Register Assignment:
37 *
38 * NOTE: currently only works on a single basic block.. need to think
39 * about how multiple basic blocks are going to get scheduled. But
40 * I think I want to re-arrange how blocks work, ie. get rid of the
41 * block nesting thing..
42 *
43 * NOTE: we could do register coalescing (eliminate moves) as part of
44 * the RA step.. OTOH I think we need to do scheduling before register
45 * assignment. And if we remove a mov that effects scheduling (unless
46 * we leave a placeholder nop, which seems lame), so I'm not really
47 * sure how practical this is to do both in a single stage. But OTOH
48 * I'm not really sure a sane way for the CP stage to realize when it
49 * cannot remove a mov due to multi-register constraints..
50 *
51 */
52
53 struct ir3_ra_ctx {
54 struct ir3_block *block;
55 enum shader_t type;
56 bool half_precision;
57 bool frag_coord;
58 bool frag_face;
59 int cnt;
60 bool error;
61 };
62
63 /* sorta ugly way to retrofit half-precision support.. rather than
64 * passing extra param around, just OR in a high bit. All the low
65 * value arithmetic (ie. +/- offset within a contiguous vec4, etc)
66 * will continue to work as long as you don't underflow (and that
67 * would go badly anyways).
68 */
69 #define REG_HALF 0x8000
70
71 struct ir3_ra_assignment {
72 int8_t off; /* offset of instruction dst within range */
73 uint8_t num; /* number of components for the range */
74 };
75
76 static void ra_assign(struct ir3_ra_ctx *ctx,
77 struct ir3_instruction *assigner, int num);
78 static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr);
79
80 /*
81 * Register Allocation:
82 */
83
84 #define REG(n, wm) (struct ir3_register){ \
85 /*.flags = ((so)->half_precision) ? IR3_REG_HALF : 0,*/ \
86 .num = (n), \
87 .wrmask = TGSI_WRITEMASK_ ## wm, \
88 }
89
90 /* check that the register exists, is a GPR and is not special (a0/p0) */
91 static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n)
92 {
93 if ((n < instr->regs_count) && reg_gpr(instr->regs[n]))
94 return instr->regs[n];
95 return NULL;
96 }
97
98 static int output_base(struct ir3_ra_ctx *ctx)
99 {
100 /* ugg, for fragment shader we need to have input at r0.x
101 * (or at least if there is a way to configure it, I can't
102 * see how because the blob driver always uses r0.x (ie.
103 * all zeros)
104 */
105 if (ctx->type == SHADER_FRAGMENT) {
106 if (ctx->half_precision)
107 return ctx->frag_face ? 1 : 0;
108 return ctx->frag_coord ? 6 : 2;
109 }
110 return 0;
111 }
112
113 /* live means read before written */
114 static void compute_liveregs(struct ir3_ra_ctx *ctx,
115 struct ir3_instruction *instr, regmask_t *liveregs)
116 {
117 struct ir3_block *block = instr->block;
118 regmask_t written;
119 unsigned i, j;
120
121 regmask_init(liveregs);
122 regmask_init(&written);
123
124 for (instr = instr->next; instr; instr = instr->next) {
125 struct ir3_register *r;
126
127 if (is_meta(instr))
128 continue;
129
130 /* check first src's read: */
131 for (j = 1; j < instr->regs_count; j++) {
132 r = reg_check(instr, j);
133 if (r)
134 regmask_set_if_not(liveregs, r, &written);
135 }
136
137 /* then dst written (if assigned already): */
138 if (instr->flags & IR3_INSTR_MARK) {
139 r = reg_check(instr, 0);
140 if (r)
141 regmask_set(&written, r);
142 }
143 }
144
145 /* be sure to account for output registers too: */
146 for (i = 0; i < block->noutputs; i++) {
147 struct ir3_register reg = REG(output_base(ctx) + i, X);
148 regmask_set_if_not(liveregs, &reg, &written);
149 }
150 }
151
152 /* calculate registers that are clobbered before last use of 'assigner'.
153 * This needs to be done backwards, although it could possibly be
154 * combined into compute_liveregs(). (Ie. compute_liveregs() could
155 * reverse the list, then do this part backwards reversing the list
156 * again back to original order.) Otoh, probably I should try to
157 * construct a proper interference graph instead.
158 *
159 * XXX this need to follow the same recursion path that is used for
160 * to rename/assign registers (ie. ra_assign_src()).. this is a bit
161 * ugly right now, maybe refactor into node iterator sort of things
162 * that iterates nodes in the correct order?
163 */
164 static bool compute_clobbers(struct ir3_ra_ctx *ctx,
165 struct ir3_instruction *instr, struct ir3_instruction *assigner,
166 regmask_t *liveregs)
167 {
168 unsigned i;
169 bool live = false, was_live = false;
170
171 if (instr == NULL) {
172 struct ir3_block *block = ctx->block;
173
174 /* if at the end, check outputs: */
175 for (i = 0; i < block->noutputs; i++)
176 if (block->outputs[i] == assigner)
177 return true;
178 return false;
179 }
180
181 for (i = 1; i < instr->regs_count; i++) {
182 struct ir3_register *reg = instr->regs[i];
183 if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) {
184 if (is_meta(instr)) {
185 switch (instr->opc) {
186 case OPC_META_INPUT:
187 // TODO
188 assert(0);
189 break;
190 case OPC_META_FO:
191 case OPC_META_FI:
192 was_live |= compute_clobbers(ctx, instr->next,
193 instr, liveregs);
194 break;
195 default:
196 break;
197 }
198 }
199 live = true;
200 break;
201 }
202 }
203
204 was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs);
205
206 if (was_live && (instr->regs_count > 0) &&
207 (instr->flags & IR3_INSTR_MARK) &&
208 !is_meta(instr))
209 regmask_set(liveregs, instr->regs[0]);
210
211 return live || was_live;
212 }
213
214 static int find_available(regmask_t *liveregs, int size)
215 {
216 unsigned i;
217 for (i = 0; i < MAX_REG - size; i++) {
218 if (!regmask_get(liveregs, &REG(i, X))) {
219 unsigned start = i++;
220 for (; (i < MAX_REG) && ((i - start) < size); i++)
221 if (regmask_get(liveregs, &REG(i, X)))
222 break;
223 if ((i - start) >= size)
224 return start;
225 }
226 }
227 assert(0);
228 return -1;
229 }
230
231 static int alloc_block(struct ir3_ra_ctx *ctx,
232 struct ir3_instruction *instr, int size)
233 {
234 if (!instr) {
235 /* special case, allocating shader outputs. At this
236 * point, nothing is allocated, just start the shader
237 * outputs at r0.x and let compute_liveregs() take
238 * care of the rest from here:
239 */
240 return 0;
241 } else {
242 regmask_t liveregs;
243 compute_liveregs(ctx, instr, &liveregs);
244
245 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
246 // XXX hack.. maybe ra_calc should give us a list of
247 // instrs to compute_clobbers() on?
248 if (is_meta(instr) && (instr->opc == OPC_META_INPUT) &&
249 (instr->regs_count == 1)) {
250 unsigned i, base = instr->regs[0]->num & ~0x3;
251 for (i = 0; i < 4; i++) {
252 struct ir3_instruction *in = ctx->block->inputs[base + i];
253 if (in)
254 compute_clobbers(ctx, in->next, in, &liveregs);
255 }
256 } else
257 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
258 compute_clobbers(ctx, instr->next, instr, &liveregs);
259 return find_available(&liveregs, size);
260 }
261 }
262
263 /*
264 * Constraint Calculation:
265 */
266
267 struct ra_calc_visitor {
268 struct ir3_visitor base;
269 struct ir3_ra_assignment a;
270 };
271
272 static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v)
273 {
274 return (struct ra_calc_visitor *)v;
275 }
276
277 /* calculate register assignment for the instruction. If the register
278 * written by this instruction is required to be part of a range, to
279 * handle other (input/output/sam/bary.f/etc) contiguous register range
280 * constraints, that is calculated handled here.
281 */
282 static void ra_calc_dst(struct ir3_visitor *v,
283 struct ir3_instruction *instr, struct ir3_register *reg)
284 {
285 struct ra_calc_visitor *c = ra_calc_visitor(v);
286 if (is_tex(instr)) {
287 c->a.off = 0;
288 c->a.num = 4;
289 } else {
290 c->a.off = 0;
291 c->a.num = 1;
292 }
293 }
294
295 static void
296 ra_calc_dst_shader_input(struct ir3_visitor *v,
297 struct ir3_instruction *instr, struct ir3_register *reg)
298 {
299 struct ra_calc_visitor *c = ra_calc_visitor(v);
300 struct ir3_block *block = instr->block;
301 struct ir3_register *dst = instr->regs[0];
302 unsigned base = dst->num & ~0x3;
303 unsigned i, num = 0;
304
305 assert(!(dst->flags & IR3_REG_IA));
306
307 /* check what input components we need: */
308 for (i = 0; i < 4; i++) {
309 unsigned idx = base + i;
310 if ((idx < block->ninputs) && block->inputs[idx])
311 num = i + 1;
312 }
313
314 c->a.off = dst->num - base;
315 c->a.num = num;
316 }
317
318 static void ra_calc_src_fanin(struct ir3_visitor *v,
319 struct ir3_instruction *instr, struct ir3_register *reg)
320 {
321 struct ra_calc_visitor *c = ra_calc_visitor(v);
322 unsigned srcn = ir3_instr_regno(instr, reg) - 1;
323 c->a.off += srcn;
324 c->a.num += srcn;
325 c->a.num = MAX2(c->a.num, instr->regs_count - 1);
326 }
327
328 static const struct ir3_visitor_funcs calc_visitor_funcs = {
329 .instr = ir3_visit_instr,
330 .dst_shader_input = ra_calc_dst_shader_input,
331 .dst_fanout = ra_calc_dst,
332 .dst_fanin = ra_calc_dst,
333 .dst = ra_calc_dst,
334 .src_fanout = ir3_visit_reg,
335 .src_fanin = ra_calc_src_fanin,
336 .src = ir3_visit_reg,
337 };
338
339 static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner)
340 {
341 struct ra_calc_visitor v = {
342 .base.funcs = &calc_visitor_funcs,
343 };
344
345 ir3_visit_instr(&v.base, assigner);
346
347 return v.a;
348 }
349
350 /*
351 * Register Assignment:
352 */
353
354 struct ra_assign_visitor {
355 struct ir3_visitor base;
356 struct ir3_ra_ctx *ctx;
357 int num;
358 };
359
360 static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v)
361 {
362 return (struct ra_assign_visitor *)v;
363 }
364
365 static type_t half_type(type_t type)
366 {
367 switch (type) {
368 case TYPE_F32: return TYPE_F16;
369 case TYPE_U32: return TYPE_U16;
370 case TYPE_S32: return TYPE_S16;
371 /* instructions may already be fixed up: */
372 case TYPE_F16:
373 case TYPE_U16:
374 case TYPE_S16:
375 return type;
376 default:
377 assert(0);
378 return ~0;
379 }
380 }
381
382 /* some instructions need fix-up if dst register is half precision: */
383 static void fixup_half_instr_dst(struct ir3_instruction *instr)
384 {
385 switch (instr->category) {
386 case 1: /* move instructions */
387 instr->cat1.dst_type = half_type(instr->cat1.dst_type);
388 break;
389 case 3:
390 switch (instr->opc) {
391 case OPC_MAD_F32:
392 instr->opc = OPC_MAD_F16;
393 break;
394 case OPC_SEL_B32:
395 instr->opc = OPC_SEL_B16;
396 break;
397 case OPC_SEL_S32:
398 instr->opc = OPC_SEL_S16;
399 break;
400 case OPC_SEL_F32:
401 instr->opc = OPC_SEL_F16;
402 break;
403 case OPC_SAD_S32:
404 instr->opc = OPC_SAD_S16;
405 break;
406 /* instructions may already be fixed up: */
407 case OPC_MAD_F16:
408 case OPC_SEL_B16:
409 case OPC_SEL_S16:
410 case OPC_SEL_F16:
411 case OPC_SAD_S16:
412 break;
413 default:
414 assert(0);
415 break;
416 }
417 break;
418 case 5:
419 instr->cat5.type = half_type(instr->cat5.type);
420 break;
421 }
422 }
423 /* some instructions need fix-up if src register is half precision: */
424 static void fixup_half_instr_src(struct ir3_instruction *instr)
425 {
426 switch (instr->category) {
427 case 1: /* move instructions */
428 instr->cat1.src_type = half_type(instr->cat1.src_type);
429 break;
430 }
431 }
432
433 static void ra_assign_reg(struct ir3_visitor *v,
434 struct ir3_instruction *instr, struct ir3_register *reg)
435 {
436 struct ra_assign_visitor *a = ra_assign_visitor(v);
437
438 if (is_flow(instr) && (instr->opc == OPC_KILL))
439 return;
440
441 reg->flags &= ~IR3_REG_SSA;
442 reg->num = a->num & ~REG_HALF;
443
444 assert(reg->num >= 0);
445
446 if (a->num & REG_HALF) {
447 reg->flags |= IR3_REG_HALF;
448 /* if dst reg being assigned, patch up the instr: */
449 if (reg == instr->regs[0])
450 fixup_half_instr_dst(instr);
451 else
452 fixup_half_instr_src(instr);
453 }
454 }
455
456 static void ra_assign_dst_shader_input(struct ir3_visitor *v,
457 struct ir3_instruction *instr, struct ir3_register *reg)
458 {
459 struct ra_assign_visitor *a = ra_assign_visitor(v);
460 unsigned i, base = reg->num & ~0x3;
461 int off = base - reg->num;
462
463 ra_assign_reg(v, instr, reg);
464 reg->flags |= IR3_REG_IA;
465
466 /* trigger assignment of all our companion input components: */
467 for (i = 0; i < 4; i++) {
468 struct ir3_instruction *in = instr->block->inputs[i+base];
469 if (in && is_meta(in) && (in->opc == OPC_META_INPUT))
470 ra_assign(a->ctx, in, a->num + off + i);
471 }
472 }
473
474 static void ra_assign_dst_fanout(struct ir3_visitor *v,
475 struct ir3_instruction *instr, struct ir3_register *reg)
476 {
477 struct ra_assign_visitor *a = ra_assign_visitor(v);
478 struct ir3_register *src = instr->regs[1];
479 ra_assign_reg(v, instr, reg);
480 if (src->flags & IR3_REG_SSA)
481 ra_assign(a->ctx, src->instr, a->num - instr->fo.off);
482 }
483
484 static void ra_assign_src_fanout(struct ir3_visitor *v,
485 struct ir3_instruction *instr, struct ir3_register *reg)
486 {
487 struct ra_assign_visitor *a = ra_assign_visitor(v);
488 ra_assign_reg(v, instr, reg);
489 ra_assign(a->ctx, instr, a->num + instr->fo.off);
490 }
491
492
493 static void ra_assign_src_fanin(struct ir3_visitor *v,
494 struct ir3_instruction *instr, struct ir3_register *reg)
495 {
496 struct ra_assign_visitor *a = ra_assign_visitor(v);
497 unsigned j, srcn = ir3_instr_regno(instr, reg) - 1;
498 ra_assign_reg(v, instr, reg);
499 ra_assign(a->ctx, instr, a->num - srcn);
500 for (j = 1; j < instr->regs_count; j++) {
501 struct ir3_register *reg = instr->regs[j];
502 if (reg->flags & IR3_REG_SSA) /* could be renamed already */
503 ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1);
504 }
505 }
506
507 static const struct ir3_visitor_funcs assign_visitor_funcs = {
508 .instr = ir3_visit_instr,
509 .dst_shader_input = ra_assign_dst_shader_input,
510 .dst_fanout = ra_assign_dst_fanout,
511 .dst_fanin = ra_assign_reg,
512 .dst = ra_assign_reg,
513 .src_fanout = ra_assign_src_fanout,
514 .src_fanin = ra_assign_src_fanin,
515 .src = ra_assign_reg,
516 };
517
518 static void ra_assign(struct ir3_ra_ctx *ctx,
519 struct ir3_instruction *assigner, int num)
520 {
521 struct ra_assign_visitor v = {
522 .base.funcs = &assign_visitor_funcs,
523 .ctx = ctx,
524 .num = num,
525 };
526
527 /* if we've already visited this instruction, bail now: */
528 if (ir3_instr_check_mark(assigner)) {
529 debug_assert(assigner->regs[0]->num == (num & ~REG_HALF));
530 if (assigner->regs[0]->num != (num & ~REG_HALF)) {
531 /* impossible situation, should have been resolved
532 * at an earlier stage by inserting extra mov's:
533 */
534 ctx->error = true;
535 }
536 return;
537 }
538
539 ir3_visit_instr(&v.base, assigner);
540 }
541
542 /*
543 *
544 */
545
546 static void ir3_instr_ra(struct ir3_ra_ctx *ctx,
547 struct ir3_instruction *instr)
548 {
549 struct ir3_ra_assignment a;
550 unsigned num;
551
552 /* skip over nop's */
553 if (instr->regs_count == 0)
554 return;
555
556 /* skip writes to a0, p0, etc */
557 if (!reg_gpr(instr->regs[0]))
558 return;
559
560 /* if we've already visited this instruction, bail now: */
561 if (instr->flags & IR3_INSTR_MARK)
562 return;
563
564 /* allocate register(s): */
565 a = ra_calc(instr);
566 num = alloc_block(ctx, instr, a.num) + a.off;
567
568 ra_assign(ctx, instr, num);
569 }
570
571 /* flatten into shader: */
572 // XXX this should probably be somewhere else:
573 static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
574 {
575 struct ir3_instruction *n;
576 struct ir3_shader *shader = block->shader;
577 struct ir3_instruction *end =
578 ir3_instr_create(block, 0, OPC_END);
579 struct ir3_instruction *last_input = NULL;
580 regmask_t needs_ss_war; /* write after read */
581 regmask_t needs_ss;
582 regmask_t needs_sy;
583
584 regmask_init(&needs_ss_war);
585 regmask_init(&needs_ss);
586 regmask_init(&needs_sy);
587
588 shader->instrs_count = 0;
589
590 for (n = block->head; n; n = n->next) {
591 struct ir3_register *reg;
592 unsigned i;
593
594 if (is_meta(n))
595 continue;
596
597 for (i = 1; i < n->regs_count; i++) {
598 reg = n->regs[i];
599
600 if (reg_gpr(reg)) {
601
602 /* TODO: we probably only need (ss) for alu
603 * instr consuming sfu result.. need to make
604 * some tests for both this and (sy)..
605 */
606 if (regmask_get(&needs_ss, reg)) {
607 n->flags |= IR3_INSTR_SS;
608 regmask_init(&needs_ss);
609 }
610
611 if (regmask_get(&needs_sy, reg)) {
612 n->flags |= IR3_INSTR_SY;
613 regmask_init(&needs_sy);
614 }
615 }
616 }
617
618 if (n->regs_count > 0) {
619 reg = n->regs[0];
620 if (regmask_get(&needs_ss_war, reg)) {
621 n->flags |= IR3_INSTR_SS;
622 regmask_init(&needs_ss_war); // ??? I assume?
623 }
624 }
625
626 /* cat5+ does not have an (ss) bit, if needed we need to
627 * insert a nop to carry the sync flag. Would be kinda
628 * clever if we were aware of this during scheduling, but
629 * this should be a pretty rare case:
630 */
631 if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
632 struct ir3_instruction *nop;
633 nop = ir3_instr_create(block, 0, OPC_NOP);
634 nop->flags |= IR3_INSTR_SS;
635 n->flags &= ~IR3_INSTR_SS;
636 }
637
638 /* need to be able to set (ss) on first instruction: */
639 if ((shader->instrs_count == 0) && (n->category >= 5))
640 ir3_instr_create(block, 0, OPC_NOP);
641
642 if (is_nop(n) && shader->instrs_count) {
643 struct ir3_instruction *last =
644 shader->instrs[shader->instrs_count-1];
645 if (is_nop(last) && (last->repeat < 5)) {
646 last->repeat++;
647 last->flags |= n->flags;
648 continue;
649 }
650 }
651
652 shader->instrs[shader->instrs_count++] = n;
653
654 if (is_sfu(n))
655 regmask_set(&needs_ss, n->regs[0]);
656
657 if (is_tex(n))
658 regmask_set(&needs_sy, n->regs[0]);
659
660 /* both tex/sfu appear to not always immediately consume
661 * their src register(s):
662 */
663 if (is_tex(n) || is_sfu(n)) {
664 for (i = 1; i < n->regs_count; i++) {
665 reg = n->regs[i];
666 if (reg_gpr(reg))
667 regmask_set(&needs_ss_war, reg);
668 }
669 }
670
671 if (is_input(n))
672 last_input = n;
673 }
674
675 if (last_input)
676 last_input->regs[0]->flags |= IR3_REG_EI;
677
678 shader->instrs[shader->instrs_count++] = end;
679
680 shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
681 }
682
683 static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
684 {
685 struct ir3_instruction *n;
686
687 if (!block->parent) {
688 unsigned i, j;
689 int base, off = output_base(ctx);
690
691 base = alloc_block(ctx, NULL, block->noutputs + off);
692
693 if (ctx->half_precision)
694 base |= REG_HALF;
695
696 for (i = 0; i < block->noutputs; i++)
697 if (block->outputs[i] && !is_kill(block->outputs[i]))
698 ra_assign(ctx, block->outputs[i], base + i + off);
699
700 if (ctx->type == SHADER_FRAGMENT) {
701 i = 0;
702 if (ctx->frag_face) {
703 /* if we have frag_face, it gets hr0.x */
704 ra_assign(ctx, block->inputs[i], REG_HALF | 0);
705 i += 4;
706 }
707 for (j = 0; i < block->ninputs; i++, j++)
708 if (block->inputs[i])
709 ra_assign(ctx, block->inputs[i], (base & ~REG_HALF) + j);
710 } else {
711 for (i = 0; i < block->ninputs; i++)
712 if (block->inputs[i])
713 ir3_instr_ra(ctx, block->inputs[i]);
714 }
715 }
716
717 /* then loop over instruction list and assign registers:
718 */
719 n = block->head;
720 while (n) {
721 ir3_instr_ra(ctx, n);
722 if (ctx->error)
723 return -1;
724 n = n->next;
725 }
726
727 legalize(ctx, block);
728
729 return 0;
730 }
731
732 int ir3_block_ra(struct ir3_block *block, enum shader_t type,
733 bool half_precision, bool frag_coord, bool frag_face)
734 {
735 struct ir3_ra_ctx ctx = {
736 .block = block,
737 .type = type,
738 .half_precision = half_precision,
739 .frag_coord = frag_coord,
740 .frag_face = frag_face,
741 };
742 ir3_shader_clear_mark(block->shader);
743 return block_ra(&ctx, block);
744 }