freedreno/a3xx/compiler: half-precision output
[mesa.git] / src / gallium / drivers / freedreno / a3xx / ir3_ra.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_shader_tokens.h"
30 #include "util/u_math.h"
31
32 #include "ir3.h"
33 #include "ir3_visitor.h"
34
35 /*
36 * Register Assignment:
37 *
38 * NOTE: currently only works on a single basic block.. need to think
39 * about how multiple basic blocks are going to get scheduled. But
40 * I think I want to re-arrange how blocks work, ie. get rid of the
41 * block nesting thing..
42 *
43 * NOTE: we could do register coalescing (eliminate moves) as part of
44 * the RA step.. OTOH I think we need to do scheduling before register
45 * assignment. And if we remove a mov that effects scheduling (unless
46 * we leave a placeholder nop, which seems lame), so I'm not really
47 * sure how practical this is to do both in a single stage. But OTOH
48 * I'm not really sure a sane way for the CP stage to realize when it
49 * cannot remove a mov due to multi-register constraints..
50 *
51 */
52
53 struct ir3_ra_ctx {
54 struct ir3_block *block;
55 enum shader_t type;
56 bool half_precision;
57 int cnt;
58 bool error;
59 };
60
61 /* sorta ugly way to retrofit half-precision support.. rather than
62 * passing extra param around, just OR in a high bit. All the low
63 * value arithmetic (ie. +/- offset within a contiguous vec4, etc)
64 * will continue to work as long as you don't underflow (and that
65 * would go badly anyways).
66 */
67 #define REG_HALF 0x8000
68
69 struct ir3_ra_assignment {
70 int8_t off; /* offset of instruction dst within range */
71 uint8_t num; /* number of components for the range */
72 };
73
74 static void ra_assign(struct ir3_ra_ctx *ctx,
75 struct ir3_instruction *assigner, int num);
76 static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr);
77
78 /*
79 * Register Allocation:
80 */
81
82 #define REG(n, wm) (struct ir3_register){ \
83 /*.flags = ((so)->half_precision) ? IR3_REG_HALF : 0,*/ \
84 .num = (n), \
85 .wrmask = TGSI_WRITEMASK_ ## wm, \
86 }
87
88 /* check that the register exists, is a GPR and is not special (a0/p0) */
89 static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n)
90 {
91 if ((n < instr->regs_count) && reg_gpr(instr->regs[n]))
92 return instr->regs[n];
93 return NULL;
94 }
95
96 static int output_base(struct ir3_ra_ctx *ctx)
97 {
98 /* ugg, for fragment shader we need to have input at r0.x
99 * (or at least if there is a way to configure it, I can't
100 * see how because the blob driver always uses r0.x (ie.
101 * all zeros)
102 */
103 if ((ctx->type == SHADER_FRAGMENT) && !ctx->half_precision)
104 return 2;
105 return 0;
106 }
107
108 /* live means read before written */
109 static void compute_liveregs(struct ir3_ra_ctx *ctx,
110 struct ir3_instruction *instr, regmask_t *liveregs)
111 {
112 struct ir3_block *block = instr->block;
113 regmask_t written;
114 unsigned i, j;
115
116 regmask_init(liveregs);
117 regmask_init(&written);
118
119 for (instr = instr->next; instr; instr = instr->next) {
120 struct ir3_register *r;
121
122 if (is_meta(instr))
123 continue;
124
125 /* check first src's read: */
126 for (j = 1; j < instr->regs_count; j++) {
127 r = reg_check(instr, j);
128 if (r)
129 regmask_set_if_not(liveregs, r, &written);
130 }
131
132 /* then dst written (if assigned already): */
133 if (instr->flags & IR3_INSTR_MARK) {
134 r = reg_check(instr, 0);
135 if (r)
136 regmask_set(&written, r);
137 }
138 }
139
140 /* be sure to account for output registers too: */
141 for (i = 0; i < block->noutputs; i++) {
142 struct ir3_register reg = REG(output_base(ctx) + i, X);
143 regmask_set_if_not(liveregs, &reg, &written);
144 }
145 }
146
147 /* calculate registers that are clobbered before last use of 'assigner'.
148 * This needs to be done backwards, although it could possibly be
149 * combined into compute_liveregs(). (Ie. compute_liveregs() could
150 * reverse the list, then do this part backwards reversing the list
151 * again back to original order.) Otoh, probably I should try to
152 * construct a proper interference graph instead.
153 *
154 * XXX this need to follow the same recursion path that is used for
155 * to rename/assign registers (ie. ra_assign_src()).. this is a bit
156 * ugly right now, maybe refactor into node iterator sort of things
157 * that iterates nodes in the correct order?
158 */
159 static bool compute_clobbers(struct ir3_ra_ctx *ctx,
160 struct ir3_instruction *instr, struct ir3_instruction *assigner,
161 regmask_t *liveregs)
162 {
163 unsigned i;
164 bool live = false, was_live = false;
165
166 if (instr == NULL) {
167 struct ir3_block *block = ctx->block;
168
169 /* if at the end, check outputs: */
170 for (i = 0; i < block->noutputs; i++)
171 if (block->outputs[i] == assigner)
172 return true;
173 return false;
174 }
175
176 for (i = 1; i < instr->regs_count; i++) {
177 struct ir3_register *reg = instr->regs[i];
178 if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) {
179 if (is_meta(instr)) {
180 switch (instr->opc) {
181 case OPC_META_INPUT:
182 // TODO
183 assert(0);
184 break;
185 case OPC_META_FO:
186 case OPC_META_FI:
187 was_live |= compute_clobbers(ctx, instr->next,
188 instr, liveregs);
189 break;
190 default:
191 break;
192 }
193 }
194 live = true;
195 break;
196 }
197 }
198
199 was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs);
200
201 if (was_live && (instr->regs_count > 0) &&
202 (instr->flags & IR3_INSTR_MARK) &&
203 !is_meta(instr))
204 regmask_set(liveregs, instr->regs[0]);
205
206 return live || was_live;
207 }
208
209 static int find_available(regmask_t *liveregs, int size)
210 {
211 unsigned i;
212 for (i = 0; i < MAX_REG - size; i++) {
213 if (!regmask_get(liveregs, &REG(i, X))) {
214 unsigned start = i++;
215 for (; (i < MAX_REG) && ((i - start) < size); i++)
216 if (regmask_get(liveregs, &REG(i, X)))
217 break;
218 if ((i - start) >= size)
219 return start;
220 }
221 }
222 assert(0);
223 return -1;
224 }
225
226 static int alloc_block(struct ir3_ra_ctx *ctx,
227 struct ir3_instruction *instr, int size)
228 {
229 if (!instr) {
230 /* special case, allocating shader outputs. At this
231 * point, nothing is allocated, just start the shader
232 * outputs at r0.x and let compute_liveregs() take
233 * care of the rest from here:
234 */
235 return 0;
236 } else {
237 regmask_t liveregs;
238 compute_liveregs(ctx, instr, &liveregs);
239
240 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
241 // XXX hack.. maybe ra_calc should give us a list of
242 // instrs to compute_clobbers() on?
243 if (is_meta(instr) && (instr->opc == OPC_META_INPUT) &&
244 (instr->regs_count == 1)) {
245 unsigned i, base = instr->regs[0]->num & ~0x3;
246 for (i = 0; i < 4; i++) {
247 struct ir3_instruction *in = ctx->block->inputs[base + i];
248 if (in)
249 compute_clobbers(ctx, in->next, in, &liveregs);
250 }
251 } else
252 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
253 compute_clobbers(ctx, instr->next, instr, &liveregs);
254 return find_available(&liveregs, size);
255 }
256 }
257
258 /*
259 * Constraint Calculation:
260 */
261
262 struct ra_calc_visitor {
263 struct ir3_visitor base;
264 struct ir3_ra_assignment a;
265 };
266
267 static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v)
268 {
269 return (struct ra_calc_visitor *)v;
270 }
271
272 /* calculate register assignment for the instruction. If the register
273 * written by this instruction is required to be part of a range, to
274 * handle other (input/output/sam/bary.f/etc) contiguous register range
275 * constraints, that is calculated handled here.
276 */
277 static void ra_calc_dst(struct ir3_visitor *v,
278 struct ir3_instruction *instr, struct ir3_register *reg)
279 {
280 struct ra_calc_visitor *c = ra_calc_visitor(v);
281 if (is_tex(instr)) {
282 c->a.off = 0;
283 c->a.num = 4;
284 } else {
285 c->a.off = 0;
286 c->a.num = 1;
287 }
288 }
289
290 static void
291 ra_calc_dst_shader_input(struct ir3_visitor *v,
292 struct ir3_instruction *instr, struct ir3_register *reg)
293 {
294 struct ra_calc_visitor *c = ra_calc_visitor(v);
295 struct ir3_block *block = instr->block;
296 struct ir3_register *dst = instr->regs[0];
297 unsigned base = dst->num & ~0x3;
298 unsigned i, num = 0;
299
300 assert(!(dst->flags & IR3_REG_IA));
301
302 /* check what input components we need: */
303 for (i = 0; i < 4; i++) {
304 unsigned idx = base + i;
305 if ((idx < block->ninputs) && block->inputs[idx])
306 num = i + 1;
307 }
308
309 c->a.off = dst->num - base;
310 c->a.num = num;
311 }
312
313 static void ra_calc_src_fanin(struct ir3_visitor *v,
314 struct ir3_instruction *instr, struct ir3_register *reg)
315 {
316 struct ra_calc_visitor *c = ra_calc_visitor(v);
317 unsigned srcn = ir3_instr_regno(instr, reg) - 1;
318 c->a.off += srcn;
319 c->a.num += srcn;
320 c->a.num = MAX2(c->a.num, instr->regs_count - 1);
321 }
322
323 static const struct ir3_visitor_funcs calc_visitor_funcs = {
324 .instr = ir3_visit_instr,
325 .dst_shader_input = ra_calc_dst_shader_input,
326 .dst_fanout = ra_calc_dst,
327 .dst_fanin = ra_calc_dst,
328 .dst = ra_calc_dst,
329 .src_fanout = ir3_visit_reg,
330 .src_fanin = ra_calc_src_fanin,
331 .src = ir3_visit_reg,
332 };
333
334 static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner)
335 {
336 struct ra_calc_visitor v = {
337 .base.funcs = &calc_visitor_funcs,
338 };
339
340 ir3_visit_instr(&v.base, assigner);
341
342 return v.a;
343 }
344
345 /*
346 * Register Assignment:
347 */
348
349 struct ra_assign_visitor {
350 struct ir3_visitor base;
351 struct ir3_ra_ctx *ctx;
352 int num;
353 };
354
355 static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v)
356 {
357 return (struct ra_assign_visitor *)v;
358 }
359
360 static type_t half_type(type_t type)
361 {
362 switch (type) {
363 case TYPE_F32: return TYPE_F16;
364 case TYPE_U32: return TYPE_U16;
365 case TYPE_S32: return TYPE_S16;
366 /* instructions may already be fixed up: */
367 case TYPE_F16:
368 case TYPE_U16:
369 case TYPE_S16:
370 return type;
371 default:
372 assert(0);
373 return ~0;
374 }
375 }
376
377 /* some instructions need fix-up if dst register is half precision: */
378 static void fixup_half_instr_dst(struct ir3_instruction *instr)
379 {
380 switch (instr->category) {
381 case 1: /* move instructions */
382 instr->cat1.dst_type = half_type(instr->cat1.dst_type);
383 break;
384 case 3:
385 switch (instr->opc) {
386 case OPC_MAD_F32:
387 instr->opc = OPC_MAD_F16;
388 break;
389 case OPC_SEL_B32:
390 instr->opc = OPC_SEL_B16;
391 break;
392 case OPC_SEL_S32:
393 instr->opc = OPC_SEL_S16;
394 break;
395 case OPC_SEL_F32:
396 instr->opc = OPC_SEL_F16;
397 break;
398 case OPC_SAD_S32:
399 instr->opc = OPC_SAD_S16;
400 break;
401 /* instructions may already be fixed up: */
402 case OPC_MAD_F16:
403 case OPC_SEL_B16:
404 case OPC_SEL_S16:
405 case OPC_SEL_F16:
406 case OPC_SAD_S16:
407 break;
408 default:
409 assert(0);
410 break;
411 }
412 break;
413 case 5:
414 instr->cat5.type = half_type(instr->cat5.type);
415 break;
416 }
417 }
418 /* some instructions need fix-up if src register is half precision: */
419 static void fixup_half_instr_src(struct ir3_instruction *instr)
420 {
421 switch (instr->category) {
422 case 1: /* move instructions */
423 instr->cat1.src_type = half_type(instr->cat1.src_type);
424 break;
425 }
426 }
427
428 static void ra_assign_reg(struct ir3_visitor *v,
429 struct ir3_instruction *instr, struct ir3_register *reg)
430 {
431 struct ra_assign_visitor *a = ra_assign_visitor(v);
432 reg->flags &= ~IR3_REG_SSA;
433 reg->num = a->num & ~REG_HALF;
434 if (a->num & REG_HALF) {
435 reg->flags |= IR3_REG_HALF;
436 /* if dst reg being assigned, patch up the instr: */
437 if (reg == instr->regs[0])
438 fixup_half_instr_dst(instr);
439 else
440 fixup_half_instr_src(instr);
441 }
442 }
443
444 static void ra_assign_dst_shader_input(struct ir3_visitor *v,
445 struct ir3_instruction *instr, struct ir3_register *reg)
446 {
447 struct ra_assign_visitor *a = ra_assign_visitor(v);
448 unsigned i, base = reg->num & ~0x3;
449 int off = base - reg->num;
450
451 ra_assign_reg(v, instr, reg);
452 reg->flags |= IR3_REG_IA;
453
454 /* trigger assignment of all our companion input components: */
455 for (i = 0; i < 4; i++) {
456 struct ir3_instruction *in = instr->block->inputs[i+base];
457 if (in && is_meta(in) && (in->opc == OPC_META_INPUT))
458 ra_assign(a->ctx, in, a->num + off + i);
459 }
460 }
461
462 static void ra_assign_dst_fanout(struct ir3_visitor *v,
463 struct ir3_instruction *instr, struct ir3_register *reg)
464 {
465 struct ra_assign_visitor *a = ra_assign_visitor(v);
466 struct ir3_register *src = instr->regs[1];
467 ra_assign_reg(v, instr, reg);
468 if (src->flags & IR3_REG_SSA)
469 ra_assign(a->ctx, src->instr, a->num - instr->fo.off);
470 }
471
472 static void ra_assign_src_fanout(struct ir3_visitor *v,
473 struct ir3_instruction *instr, struct ir3_register *reg)
474 {
475 struct ra_assign_visitor *a = ra_assign_visitor(v);
476 ra_assign_reg(v, instr, reg);
477 ra_assign(a->ctx, instr, a->num + instr->fo.off);
478 }
479
480
481 static void ra_assign_src_fanin(struct ir3_visitor *v,
482 struct ir3_instruction *instr, struct ir3_register *reg)
483 {
484 struct ra_assign_visitor *a = ra_assign_visitor(v);
485 unsigned j, srcn = ir3_instr_regno(instr, reg) - 1;
486 ra_assign_reg(v, instr, reg);
487 ra_assign(a->ctx, instr, a->num - srcn);
488 for (j = 1; j < instr->regs_count; j++) {
489 struct ir3_register *reg = instr->regs[j];
490 if (reg->flags & IR3_REG_SSA) /* could be renamed already */
491 ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1);
492 }
493 }
494
495 static const struct ir3_visitor_funcs assign_visitor_funcs = {
496 .instr = ir3_visit_instr,
497 .dst_shader_input = ra_assign_dst_shader_input,
498 .dst_fanout = ra_assign_dst_fanout,
499 .dst_fanin = ra_assign_reg,
500 .dst = ra_assign_reg,
501 .src_fanout = ra_assign_src_fanout,
502 .src_fanin = ra_assign_src_fanin,
503 .src = ra_assign_reg,
504 };
505
506 static void ra_assign(struct ir3_ra_ctx *ctx,
507 struct ir3_instruction *assigner, int num)
508 {
509 struct ra_assign_visitor v = {
510 .base.funcs = &assign_visitor_funcs,
511 .ctx = ctx,
512 .num = num,
513 };
514
515 /* if we've already visited this instruction, bail now: */
516 if (ir3_instr_check_mark(assigner)) {
517 debug_assert(assigner->regs[0]->num == (num & ~REG_HALF));
518 if (assigner->regs[0]->num != (num & ~REG_HALF)) {
519 /* impossible situation, should have been resolved
520 * at an earlier stage by inserting extra mov's:
521 */
522 ctx->error = true;
523 }
524 return;
525 }
526
527 ir3_visit_instr(&v.base, assigner);
528 }
529
530 /*
531 *
532 */
533
534 static void ir3_instr_ra(struct ir3_ra_ctx *ctx,
535 struct ir3_instruction *instr)
536 {
537 struct ir3_ra_assignment a;
538 unsigned num;
539
540 /* skip over nop's */
541 if (instr->regs_count == 0)
542 return;
543
544 /* skip writes to a0, p0, etc */
545 if (!reg_gpr(instr->regs[0]))
546 return;
547
548 /* if we've already visited this instruction, bail now: */
549 if (instr->flags & IR3_INSTR_MARK)
550 return;
551
552 /* allocate register(s): */
553 a = ra_calc(instr);
554 num = alloc_block(ctx, instr, a.num) + a.off;
555
556 ra_assign(ctx, instr, num);
557 }
558
559 /* flatten into shader: */
560 // XXX this should probably be somewhere else:
561 static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
562 {
563 struct ir3_instruction *n;
564 struct ir3_shader *shader = block->shader;
565 struct ir3_instruction *end =
566 ir3_instr_create(block, 0, OPC_END);
567 struct ir3_instruction *last_input = NULL;
568 regmask_t needs_ss_war;
569 regmask_t needs_ss;
570 regmask_t needs_sy;
571
572 regmask_init(&needs_ss_war);
573 regmask_init(&needs_ss);
574 regmask_init(&needs_sy);
575
576 shader->instrs_count = 0;
577
578 for (n = block->head; n; n = n->next) {
579 struct ir3_register *reg;
580 unsigned i;
581
582 if (is_meta(n))
583 continue;
584
585 for (i = 1; i < n->regs_count; i++) {
586 reg = n->regs[i];
587
588 if (reg_gpr(reg)) {
589
590 /* TODO: we probably only need (ss) for alu
591 * instr consuming sfu result.. need to make
592 * some tests for both this and (sy)..
593 */
594 if (regmask_get(&needs_ss, reg)) {
595 n->flags |= IR3_INSTR_SS;
596 regmask_init(&needs_ss);
597 }
598
599 if (regmask_get(&needs_sy, reg)) {
600 n->flags |= IR3_INSTR_SY;
601 regmask_init(&needs_sy);
602 }
603 }
604 }
605
606 if (n->regs_count > 0) {
607 reg = n->regs[0];
608 if (regmask_get(&needs_ss_war, reg)) {
609 n->flags |= IR3_INSTR_SS;
610 regmask_init(&needs_ss_war); // ??? I assume?
611 }
612 }
613
614 /* cat5+ does not have an (ss) bit, if needed we need to
615 * insert a nop to carry the sync flag. Would be kinda
616 * clever if we were aware of this during scheduling, but
617 * this should be a pretty rare case:
618 */
619 if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
620 struct ir3_instruction *nop;
621 nop = ir3_instr_create(block, 0, OPC_NOP);
622 nop->flags |= IR3_INSTR_SS;
623 n->flags &= ~IR3_INSTR_SS;
624 }
625
626 /* need to be able to set (ss) on first instruction: */
627 if ((shader->instrs_count == 0) && (n->category >= 5))
628 ir3_instr_create(block, 0, OPC_NOP);
629
630 if (is_nop(n) && shader->instrs_count) {
631 struct ir3_instruction *last =
632 shader->instrs[shader->instrs_count-1];
633 if (is_nop(last) && (last->repeat < 5)) {
634 last->repeat++;
635 last->flags |= n->flags;
636 continue;
637 }
638 }
639
640 shader->instrs[shader->instrs_count++] = n;
641
642 if (is_sfu(n))
643 regmask_set(&needs_ss, n->regs[0]);
644
645 if (is_tex(n))
646 regmask_set(&needs_sy, n->regs[0]);
647
648 /* both tex/sfu appear to not always immediately consume
649 * their src register(s):
650 */
651 if (is_tex(n) || is_sfu(n)) {
652 for (i = 1; i < n->regs_count; i++) {
653 reg = n->regs[i];
654 if (reg_gpr(reg))
655 regmask_set(&needs_ss_war, reg);
656 }
657 }
658
659 if (is_input(n))
660 last_input = n;
661 }
662
663 if (last_input)
664 last_input->regs[0]->flags |= IR3_REG_EI;
665
666 shader->instrs[shader->instrs_count++] = end;
667
668 shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
669 }
670
671 static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
672 {
673 struct ir3_instruction *n;
674
675 if (!block->parent) {
676 unsigned i;
677 int base, off = output_base(ctx);
678
679 base = alloc_block(ctx, NULL, block->noutputs + off);
680
681 if (ctx->half_precision)
682 base |= REG_HALF;
683
684 for (i = 0; i < block->noutputs; i++)
685 if (block->outputs[i])
686 ra_assign(ctx, block->outputs[i], base + i + off);
687
688 if (ctx->type == SHADER_FRAGMENT) {
689 for (i = 0; i < block->ninputs; i++)
690 if (block->inputs[i])
691 ra_assign(ctx, block->inputs[i], (base & ~REG_HALF) + i);
692 } else {
693 for (i = 0; i < block->ninputs; i++)
694 if (block->inputs[i])
695 ir3_instr_ra(ctx, block->inputs[i]);
696 }
697 }
698
699 /* then loop over instruction list and assign registers:
700 */
701 n = block->head;
702 while (n) {
703 ir3_instr_ra(ctx, n);
704 if (ctx->error)
705 return -1;
706 n = n->next;
707 }
708
709 legalize(ctx, block);
710
711 return 0;
712 }
713
714 int ir3_block_ra(struct ir3_block *block, enum shader_t type,
715 bool half_precision)
716 {
717 struct ir3_ra_ctx ctx = {
718 .block = block,
719 .type = type,
720 .half_precision = half_precision,
721 };
722 ir3_shader_clear_mark(block->shader);
723 return block_ra(&ctx, block);
724 }