freedreno/ir3: fix lockups with lame FRAG shaders
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_ra.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_shader_tokens.h"
30 #include "util/u_math.h"
31
32 #include "ir3.h"
33 #include "ir3_visitor.h"
34
35 /*
36 * Register Assignment:
37 *
38 * NOTE: currently only works on a single basic block.. need to think
39 * about how multiple basic blocks are going to get scheduled. But
40 * I think I want to re-arrange how blocks work, ie. get rid of the
41 * block nesting thing..
42 *
43 * NOTE: we could do register coalescing (eliminate moves) as part of
44 * the RA step.. OTOH I think we need to do scheduling before register
45 * assignment. And if we remove a mov that effects scheduling (unless
46 * we leave a placeholder nop, which seems lame), so I'm not really
47 * sure how practical this is to do both in a single stage. But OTOH
48 * I'm not really sure a sane way for the CP stage to realize when it
49 * cannot remove a mov due to multi-register constraints..
50 *
51 */
52
53 struct ir3_ra_ctx {
54 struct ir3_block *block;
55 enum shader_t type;
56 bool half_precision;
57 bool frag_coord;
58 bool frag_face;
59 bool has_samp;
60 int cnt;
61 int max_bary;
62 bool error;
63 };
64
65 /* sorta ugly way to retrofit half-precision support.. rather than
66 * passing extra param around, just OR in a high bit. All the low
67 * value arithmetic (ie. +/- offset within a contiguous vec4, etc)
68 * will continue to work as long as you don't underflow (and that
69 * would go badly anyways).
70 */
71 #define REG_HALF 0x8000
72
73 struct ir3_ra_assignment {
74 int8_t off; /* offset of instruction dst within range */
75 uint8_t num; /* number of components for the range */
76 };
77
78 static void ra_assign(struct ir3_ra_ctx *ctx,
79 struct ir3_instruction *assigner, int num);
80 static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr);
81
82 /*
83 * Register Allocation:
84 */
85
86 #define REG(n, wm, f) (struct ir3_register){ \
87 .flags = (f), \
88 .num = (n), \
89 .wrmask = TGSI_WRITEMASK_ ## wm, \
90 }
91
92 /* check that the register exists, is a GPR and is not special (a0/p0) */
93 static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n)
94 {
95 if ((n < instr->regs_count) && reg_gpr(instr->regs[n]))
96 return instr->regs[n];
97 return NULL;
98 }
99
100 static int output_base(struct ir3_ra_ctx *ctx)
101 {
102 /* ugg, for fragment shader we need to have input at r0.x
103 * (or at least if there is a way to configure it, I can't
104 * see how because the blob driver always uses r0.x (ie.
105 * all zeros)
106 */
107 if (ctx->type == SHADER_FRAGMENT) {
108 if (ctx->half_precision)
109 return ctx->frag_face ? 4 : 3;
110 return ctx->frag_coord ? 8 : 4;
111 }
112 return 0;
113 }
114
115 /* live means read before written */
116 static void compute_liveregs(struct ir3_ra_ctx *ctx,
117 struct ir3_instruction *instr, regmask_t *liveregs)
118 {
119 struct ir3_block *block = instr->block;
120 regmask_t written;
121 unsigned i, j;
122
123 regmask_init(liveregs);
124 regmask_init(&written);
125
126 for (instr = instr->next; instr; instr = instr->next) {
127 struct ir3_register *r;
128
129 if (is_meta(instr))
130 continue;
131
132 /* check first src's read: */
133 for (j = 1; j < instr->regs_count; j++) {
134 r = reg_check(instr, j);
135 if (r)
136 regmask_set_if_not(liveregs, r, &written);
137 }
138
139 /* then dst written (if assigned already): */
140 if (instr->flags & IR3_INSTR_MARK) {
141 r = reg_check(instr, 0);
142 if (r)
143 regmask_set(&written, r);
144 }
145 }
146
147 /* be sure to account for output registers too: */
148 for (i = 0; i < block->noutputs; i++) {
149 struct ir3_register reg = REG(output_base(ctx) + i, X, 0);
150 regmask_set_if_not(liveregs, &reg, &written);
151 }
152 }
153
154 /* calculate registers that are clobbered before last use of 'assigner'.
155 * This needs to be done backwards, although it could possibly be
156 * combined into compute_liveregs(). (Ie. compute_liveregs() could
157 * reverse the list, then do this part backwards reversing the list
158 * again back to original order.) Otoh, probably I should try to
159 * construct a proper interference graph instead.
160 *
161 * XXX this need to follow the same recursion path that is used for
162 * to rename/assign registers (ie. ra_assign_src()).. this is a bit
163 * ugly right now, maybe refactor into node iterator sort of things
164 * that iterates nodes in the correct order?
165 */
166 static bool compute_clobbers(struct ir3_ra_ctx *ctx,
167 struct ir3_instruction *instr, struct ir3_instruction *assigner,
168 regmask_t *liveregs)
169 {
170 unsigned i;
171 bool live = false, was_live = false;
172
173 if (instr == NULL) {
174 struct ir3_block *block = ctx->block;
175
176 /* if at the end, check outputs: */
177 for (i = 0; i < block->noutputs; i++)
178 if (block->outputs[i] == assigner)
179 return true;
180 return false;
181 }
182
183 for (i = 1; i < instr->regs_count; i++) {
184 struct ir3_register *reg = instr->regs[i];
185 if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) {
186 if (is_meta(instr)) {
187 switch (instr->opc) {
188 case OPC_META_INPUT:
189 // TODO
190 assert(0);
191 break;
192 case OPC_META_FO:
193 case OPC_META_FI:
194 was_live |= compute_clobbers(ctx, instr->next,
195 instr, liveregs);
196 break;
197 default:
198 break;
199 }
200 }
201 live = true;
202 break;
203 }
204 }
205
206 was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs);
207
208 if (was_live && (instr->regs_count > 0) &&
209 (instr->flags & IR3_INSTR_MARK) &&
210 !is_meta(instr))
211 regmask_set(liveregs, instr->regs[0]);
212
213 return live || was_live;
214 }
215
216 static int find_available(regmask_t *liveregs, int size, bool half)
217 {
218 unsigned i;
219 unsigned f = half ? IR3_REG_HALF : 0;
220 for (i = 0; i < MAX_REG - size; i++) {
221 if (!regmask_get(liveregs, &REG(i, X, f))) {
222 unsigned start = i++;
223 for (; (i < MAX_REG) && ((i - start) < size); i++)
224 if (regmask_get(liveregs, &REG(i, X, f)))
225 break;
226 if ((i - start) >= size)
227 return start;
228 }
229 }
230 assert(0);
231 return -1;
232 }
233
234 static int alloc_block(struct ir3_ra_ctx *ctx,
235 struct ir3_instruction *instr, int size)
236 {
237 if (!instr) {
238 /* special case, allocating shader outputs. At this
239 * point, nothing is allocated, just start the shader
240 * outputs at r0.x and let compute_liveregs() take
241 * care of the rest from here:
242 */
243 return 0;
244 } else {
245 struct ir3_register *dst = instr->regs[0];
246 regmask_t liveregs;
247
248 compute_liveregs(ctx, instr, &liveregs);
249
250 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
251 // XXX hack.. maybe ra_calc should give us a list of
252 // instrs to compute_clobbers() on?
253 if (is_meta(instr) && (instr->opc == OPC_META_INPUT) &&
254 (instr->regs_count == 1)) {
255 unsigned i, base = instr->regs[0]->num & ~0x3;
256 for (i = 0; i < 4; i++) {
257 struct ir3_instruction *in = NULL;
258 if ((base + i) < ctx->block->ninputs)
259 in = ctx->block->inputs[base + i];
260 if (in)
261 compute_clobbers(ctx, in->next, in, &liveregs);
262 }
263 } else
264 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
265 compute_clobbers(ctx, instr->next, instr, &liveregs);
266
267 return find_available(&liveregs, size,
268 !!(dst->flags & IR3_REG_HALF));
269 }
270 }
271
272 /*
273 * Constraint Calculation:
274 */
275
276 struct ra_calc_visitor {
277 struct ir3_visitor base;
278 struct ir3_ra_assignment a;
279 };
280
281 static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v)
282 {
283 return (struct ra_calc_visitor *)v;
284 }
285
286 /* calculate register assignment for the instruction. If the register
287 * written by this instruction is required to be part of a range, to
288 * handle other (input/output/sam/bary.f/etc) contiguous register range
289 * constraints, that is calculated handled here.
290 */
291 static void ra_calc_dst(struct ir3_visitor *v,
292 struct ir3_instruction *instr, struct ir3_register *reg)
293 {
294 struct ra_calc_visitor *c = ra_calc_visitor(v);
295 if (is_tex(instr)) {
296 c->a.off = 0;
297 c->a.num = 4;
298 } else {
299 c->a.off = 0;
300 c->a.num = 1;
301 }
302 }
303
304 static void
305 ra_calc_dst_shader_input(struct ir3_visitor *v,
306 struct ir3_instruction *instr, struct ir3_register *reg)
307 {
308 struct ra_calc_visitor *c = ra_calc_visitor(v);
309 struct ir3_block *block = instr->block;
310 struct ir3_register *dst = instr->regs[0];
311 unsigned base = dst->num & ~0x3;
312 unsigned i, num = 0;
313
314 assert(!(dst->flags & IR3_REG_IA));
315
316 /* check what input components we need: */
317 for (i = 0; i < 4; i++) {
318 unsigned idx = base + i;
319 if ((idx < block->ninputs) && block->inputs[idx])
320 num = i + 1;
321 }
322
323 c->a.off = dst->num - base;
324 c->a.num = num;
325 }
326
327 static void ra_calc_src_fanin(struct ir3_visitor *v,
328 struct ir3_instruction *instr, struct ir3_register *reg)
329 {
330 struct ra_calc_visitor *c = ra_calc_visitor(v);
331 unsigned srcn = ir3_instr_regno(instr, reg) - 1;
332 c->a.off += srcn;
333 c->a.num += srcn;
334 c->a.num = MAX2(c->a.num, instr->regs_count - 1);
335 }
336
337 static const struct ir3_visitor_funcs calc_visitor_funcs = {
338 .instr = ir3_visit_instr,
339 .dst_shader_input = ra_calc_dst_shader_input,
340 .dst_fanout = ra_calc_dst,
341 .dst_fanin = ra_calc_dst,
342 .dst = ra_calc_dst,
343 .src_fanout = ir3_visit_reg,
344 .src_fanin = ra_calc_src_fanin,
345 .src = ir3_visit_reg,
346 };
347
348 static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner)
349 {
350 struct ra_calc_visitor v = {
351 .base.funcs = &calc_visitor_funcs,
352 };
353
354 ir3_visit_instr(&v.base, assigner);
355
356 return v.a;
357 }
358
359 /*
360 * Register Assignment:
361 */
362
363 struct ra_assign_visitor {
364 struct ir3_visitor base;
365 struct ir3_ra_ctx *ctx;
366 int num;
367 };
368
369 static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v)
370 {
371 return (struct ra_assign_visitor *)v;
372 }
373
374 static type_t half_type(type_t type)
375 {
376 switch (type) {
377 case TYPE_F32: return TYPE_F16;
378 case TYPE_U32: return TYPE_U16;
379 case TYPE_S32: return TYPE_S16;
380 /* instructions may already be fixed up: */
381 case TYPE_F16:
382 case TYPE_U16:
383 case TYPE_S16:
384 return type;
385 default:
386 assert(0);
387 return ~0;
388 }
389 }
390
391 /* some instructions need fix-up if dst register is half precision: */
392 static void fixup_half_instr_dst(struct ir3_instruction *instr)
393 {
394 switch (instr->category) {
395 case 1: /* move instructions */
396 instr->cat1.dst_type = half_type(instr->cat1.dst_type);
397 break;
398 case 3:
399 switch (instr->opc) {
400 case OPC_MAD_F32:
401 instr->opc = OPC_MAD_F16;
402 break;
403 case OPC_SEL_B32:
404 instr->opc = OPC_SEL_B16;
405 break;
406 case OPC_SEL_S32:
407 instr->opc = OPC_SEL_S16;
408 break;
409 case OPC_SEL_F32:
410 instr->opc = OPC_SEL_F16;
411 break;
412 case OPC_SAD_S32:
413 instr->opc = OPC_SAD_S16;
414 break;
415 /* instructions may already be fixed up: */
416 case OPC_MAD_F16:
417 case OPC_SEL_B16:
418 case OPC_SEL_S16:
419 case OPC_SEL_F16:
420 case OPC_SAD_S16:
421 break;
422 default:
423 assert(0);
424 break;
425 }
426 break;
427 case 5:
428 instr->cat5.type = half_type(instr->cat5.type);
429 break;
430 }
431 }
432 /* some instructions need fix-up if src register is half precision: */
433 static void fixup_half_instr_src(struct ir3_instruction *instr)
434 {
435 switch (instr->category) {
436 case 1: /* move instructions */
437 instr->cat1.src_type = half_type(instr->cat1.src_type);
438 break;
439 }
440 }
441
442 static void ra_assign_reg(struct ir3_visitor *v,
443 struct ir3_instruction *instr, struct ir3_register *reg)
444 {
445 struct ra_assign_visitor *a = ra_assign_visitor(v);
446
447 if (is_flow(instr) && (instr->opc == OPC_KILL))
448 return;
449
450 reg->flags &= ~IR3_REG_SSA;
451 reg->num = a->num & ~REG_HALF;
452
453 assert(reg->num >= 0);
454
455 if (a->num & REG_HALF) {
456 reg->flags |= IR3_REG_HALF;
457 /* if dst reg being assigned, patch up the instr: */
458 if (reg == instr->regs[0])
459 fixup_half_instr_dst(instr);
460 else
461 fixup_half_instr_src(instr);
462 }
463 }
464
465 static void ra_assign_dst_shader_input(struct ir3_visitor *v,
466 struct ir3_instruction *instr, struct ir3_register *reg)
467 {
468 struct ra_assign_visitor *a = ra_assign_visitor(v);
469 unsigned i, base = reg->num & ~0x3;
470 int off = base - reg->num;
471
472 ra_assign_reg(v, instr, reg);
473 reg->flags |= IR3_REG_IA;
474
475 /* trigger assignment of all our companion input components: */
476 for (i = 0; i < 4; i++) {
477 struct ir3_instruction *in = NULL;
478 if ((base + i) < instr->block->ninputs)
479 in = instr->block->inputs[base + i];
480 if (in && is_meta(in) && (in->opc == OPC_META_INPUT))
481 ra_assign(a->ctx, in, a->num + off + i);
482 }
483 }
484
485 static void ra_assign_dst_fanout(struct ir3_visitor *v,
486 struct ir3_instruction *instr, struct ir3_register *reg)
487 {
488 struct ra_assign_visitor *a = ra_assign_visitor(v);
489 struct ir3_register *src = instr->regs[1];
490 ra_assign_reg(v, instr, reg);
491 if (src->flags & IR3_REG_SSA)
492 ra_assign(a->ctx, src->instr, a->num - instr->fo.off);
493 }
494
495 static void ra_assign_src_fanout(struct ir3_visitor *v,
496 struct ir3_instruction *instr, struct ir3_register *reg)
497 {
498 struct ra_assign_visitor *a = ra_assign_visitor(v);
499 ra_assign_reg(v, instr, reg);
500 ra_assign(a->ctx, instr, a->num + instr->fo.off);
501 }
502
503
504 static void ra_assign_src_fanin(struct ir3_visitor *v,
505 struct ir3_instruction *instr, struct ir3_register *reg)
506 {
507 struct ra_assign_visitor *a = ra_assign_visitor(v);
508 unsigned j, srcn = ir3_instr_regno(instr, reg) - 1;
509 ra_assign_reg(v, instr, reg);
510 ra_assign(a->ctx, instr, a->num - srcn);
511 for (j = 1; j < instr->regs_count; j++) {
512 struct ir3_register *reg = instr->regs[j];
513 if (reg->flags & IR3_REG_SSA) /* could be renamed already */
514 ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1);
515 }
516 }
517
518 static const struct ir3_visitor_funcs assign_visitor_funcs = {
519 .instr = ir3_visit_instr,
520 .dst_shader_input = ra_assign_dst_shader_input,
521 .dst_fanout = ra_assign_dst_fanout,
522 .dst_fanin = ra_assign_reg,
523 .dst = ra_assign_reg,
524 .src_fanout = ra_assign_src_fanout,
525 .src_fanin = ra_assign_src_fanin,
526 .src = ra_assign_reg,
527 };
528
529 static void ra_assign(struct ir3_ra_ctx *ctx,
530 struct ir3_instruction *assigner, int num)
531 {
532 struct ra_assign_visitor v = {
533 .base.funcs = &assign_visitor_funcs,
534 .ctx = ctx,
535 .num = num,
536 };
537
538 /* if we've already visited this instruction, bail now: */
539 if (ir3_instr_check_mark(assigner)) {
540 debug_assert(assigner->regs[0]->num == (num & ~REG_HALF));
541 if (assigner->regs[0]->num != (num & ~REG_HALF)) {
542 /* impossible situation, should have been resolved
543 * at an earlier stage by inserting extra mov's:
544 */
545 ctx->error = true;
546 }
547 return;
548 }
549
550 ir3_visit_instr(&v.base, assigner);
551 }
552
553 /*
554 *
555 */
556
557 static void ir3_instr_ra(struct ir3_ra_ctx *ctx,
558 struct ir3_instruction *instr)
559 {
560 struct ir3_register *dst;
561 unsigned num;
562
563 /* skip over nop's */
564 if (instr->regs_count == 0)
565 return;
566
567 dst = instr->regs[0];
568
569 /* if we've already visited this instruction, bail now: */
570 if (instr->flags & IR3_INSTR_MARK)
571 return;
572
573 /* allocate register(s): */
574 if (is_addr(instr)) {
575 num = instr->regs[2]->num;
576 } else if (reg_gpr(dst)) {
577 struct ir3_ra_assignment a;
578 a = ra_calc(instr);
579 num = alloc_block(ctx, instr, a.num) + a.off;
580 } else if (dst->flags & IR3_REG_ADDR) {
581 dst->flags &= ~IR3_REG_ADDR;
582 num = regid(REG_A0, 0) | REG_HALF;
583 } else {
584 /* predicate register (p0).. etc */
585 return;
586 }
587
588 ra_assign(ctx, instr, num);
589 }
590
591 /* flatten into shader: */
592 // XXX this should probably be somewhere else:
593 static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
594 {
595 struct ir3_instruction *n;
596 struct ir3 *shader = block->shader;
597 struct ir3_instruction *end =
598 ir3_instr_create(block, 0, OPC_END);
599 struct ir3_instruction *last_input = NULL;
600 struct ir3_instruction *last_rel = NULL;
601 regmask_t needs_ss_war; /* write after read */
602 regmask_t needs_ss;
603 regmask_t needs_sy;
604
605 regmask_init(&needs_ss_war);
606 regmask_init(&needs_ss);
607 regmask_init(&needs_sy);
608
609 shader->instrs_count = 0;
610
611 for (n = block->head; n; n = n->next) {
612 struct ir3_register *reg;
613 unsigned i;
614
615 if (is_meta(n))
616 continue;
617
618 if (is_input(n)) {
619 struct ir3_register *inloc = n->regs[1];
620 assert(inloc->flags & IR3_REG_IMMED);
621 ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val);
622 }
623
624 for (i = 1; i < n->regs_count; i++) {
625 reg = n->regs[i];
626
627 if (reg_gpr(reg)) {
628
629 /* TODO: we probably only need (ss) for alu
630 * instr consuming sfu result.. need to make
631 * some tests for both this and (sy)..
632 */
633 if (regmask_get(&needs_ss, reg)) {
634 n->flags |= IR3_INSTR_SS;
635 regmask_init(&needs_ss);
636 }
637
638 if (regmask_get(&needs_sy, reg)) {
639 n->flags |= IR3_INSTR_SY;
640 regmask_init(&needs_sy);
641 }
642 }
643
644 /* TODO: is it valid to have address reg loaded from a
645 * relative src (ie. mova a0, c<a0.x+4>)? If so, the
646 * last_rel check below should be moved ahead of this:
647 */
648 if (reg->flags & IR3_REG_RELATIV)
649 last_rel = n;
650 }
651
652 if (n->regs_count > 0) {
653 reg = n->regs[0];
654 if (regmask_get(&needs_ss_war, reg)) {
655 n->flags |= IR3_INSTR_SS;
656 regmask_init(&needs_ss_war); // ??? I assume?
657 }
658
659 if (last_rel && (reg->num == regid(REG_A0, 0))) {
660 last_rel->flags |= IR3_INSTR_UL;
661 last_rel = NULL;
662 }
663 }
664
665 /* cat5+ does not have an (ss) bit, if needed we need to
666 * insert a nop to carry the sync flag. Would be kinda
667 * clever if we were aware of this during scheduling, but
668 * this should be a pretty rare case:
669 */
670 if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
671 struct ir3_instruction *nop;
672 nop = ir3_instr_create(block, 0, OPC_NOP);
673 nop->flags |= IR3_INSTR_SS;
674 n->flags &= ~IR3_INSTR_SS;
675 }
676
677 /* need to be able to set (ss) on first instruction: */
678 if ((shader->instrs_count == 0) && (n->category >= 5))
679 ir3_instr_create(block, 0, OPC_NOP);
680
681 if (is_nop(n) && shader->instrs_count) {
682 struct ir3_instruction *last =
683 shader->instrs[shader->instrs_count-1];
684 if (is_nop(last) && (last->repeat < 5)) {
685 last->repeat++;
686 last->flags |= n->flags;
687 continue;
688 }
689 }
690
691 shader->instrs[shader->instrs_count++] = n;
692
693 if (is_sfu(n))
694 regmask_set(&needs_ss, n->regs[0]);
695
696 if (is_tex(n)) {
697 /* this ends up being the # of samp instructions.. but that
698 * is ok, everything else only cares whether it is zero or
699 * not. We do this here, rather than when we encounter a
700 * SAMP decl, because (especially in binning pass shader)
701 * the samp instruction(s) could get eliminated if the
702 * result is not used.
703 */
704 ctx->has_samp = true;
705 regmask_set(&needs_sy, n->regs[0]);
706 }
707
708 /* both tex/sfu appear to not always immediately consume
709 * their src register(s):
710 */
711 if (is_tex(n) || is_sfu(n)) {
712 for (i = 1; i < n->regs_count; i++) {
713 reg = n->regs[i];
714 if (reg_gpr(reg))
715 regmask_set(&needs_ss_war, reg);
716 }
717 }
718
719 if (is_input(n))
720 last_input = n;
721 }
722
723 if (last_input)
724 last_input->regs[0]->flags |= IR3_REG_EI;
725
726 if (last_rel)
727 last_rel->flags |= IR3_INSTR_UL;
728
729 shader->instrs[shader->instrs_count++] = end;
730
731 shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
732 }
733
734 static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
735 {
736 struct ir3_instruction *n;
737
738 if (!block->parent) {
739 unsigned i, j;
740 int base, off = output_base(ctx);
741
742 base = alloc_block(ctx, NULL, block->noutputs + off);
743
744 if (ctx->half_precision)
745 base |= REG_HALF;
746
747 for (i = 0; i < block->noutputs; i++)
748 if (block->outputs[i] && !is_kill(block->outputs[i]))
749 ra_assign(ctx, block->outputs[i], base + i + off);
750
751 if (ctx->type == SHADER_FRAGMENT) {
752 i = 0;
753 if (ctx->frag_face) {
754 /* if we have frag_face, it gets hr0.x */
755 ra_assign(ctx, block->inputs[i], REG_HALF | 0);
756 i += 4;
757 }
758 for (j = 0; i < block->ninputs; i++, j++)
759 if (block->inputs[i])
760 ra_assign(ctx, block->inputs[i], (base & ~REG_HALF) + j);
761 } else {
762 for (i = 0; i < block->ninputs; i++)
763 if (block->inputs[i])
764 ir3_instr_ra(ctx, block->inputs[i]);
765 }
766 }
767
768 /* then loop over instruction list and assign registers:
769 */
770 n = block->head;
771 while (n) {
772 ir3_instr_ra(ctx, n);
773 if (ctx->error)
774 return -1;
775 n = n->next;
776 }
777
778 legalize(ctx, block);
779
780 return 0;
781 }
782
783 int ir3_block_ra(struct ir3_block *block, enum shader_t type,
784 bool half_precision, bool frag_coord, bool frag_face,
785 bool *has_samp, int *max_bary)
786 {
787 struct ir3_ra_ctx ctx = {
788 .block = block,
789 .type = type,
790 .half_precision = half_precision,
791 .frag_coord = frag_coord,
792 .frag_face = frag_face,
793 .max_bary = -1,
794 };
795 int ret;
796
797 ir3_clear_mark(block->shader);
798 ret = block_ra(&ctx, block);
799 *has_samp = ctx.has_samp;
800 *max_bary = ctx.max_bary;
801
802 return ret;
803 }