lima/ppir: simplify select op lowering and scheduling
[mesa.git] / src / gallium / drivers / lima / ir / pp / codegen.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/ralloc.h"
26 #include "util/u_half.h"
27 #include "util/bitscan.h"
28
29 #include "ppir.h"
30 #include "codegen.h"
31 #include "lima_context.h"
32
33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34 {
35 unsigned ret = 0;
36 for (int i = 0; i < 4; i++)
37 ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38 return ret;
39 }
40
41 static int get_scl_reg_index(ppir_src *src, int component)
42 {
43 int ret = ppir_target_get_src_reg_index(src);
44 ret += src->swizzle[component];
45 return ret;
46 }
47
48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49 {
50 ppir_codegen_field_varying *f = code;
51 ppir_load_node *load = ppir_node_to_load(node);
52 ppir_dest *dest = &load->dest;
53 int index = ppir_target_get_dest_reg_index(dest);
54 int num_components = load->num_components;
55
56 if (num_components) {
57 assert(node->op == ppir_op_load_varying ||
58 node->op == ppir_op_load_coords ||
59 node->op == ppir_op_load_fragcoord ||
60 node->op == ppir_op_load_pointcoord ||
61 node->op == ppir_op_load_frontface);
62
63 f->imm.dest = index >> 2;
64 f->imm.mask = dest->write_mask << (index & 0x3);
65
66 int alignment = num_components == 3 ? 3 : num_components - 1;
67 f->imm.alignment = alignment;
68 f->imm.offset_vector = 0xf;
69
70 if (alignment == 3)
71 f->imm.index = load->index >> 2;
72 else
73 f->imm.index = load->index >> alignment;
74
75 switch (node->op) {
76 case ppir_op_load_fragcoord:
77 f->imm.source_type = 2;
78 f->imm.perspective = 3;
79 break;
80 case ppir_op_load_pointcoord:
81 f->imm.source_type = 3;
82 break;
83 case ppir_op_load_frontface:
84 f->imm.source_type = 3;
85 f->imm.perspective = 1;
86 break;
87 default:
88 break;
89 }
90 }
91 else {
92 assert(node->op == ppir_op_load_coords);
93
94 f->reg.dest = index >> 2;
95 f->reg.mask = dest->write_mask << (index & 0x3);
96
97 f->reg.source_type = 1;
98
99 ppir_src *src = &load->src;
100 index = ppir_target_get_src_reg_index(src);
101 f->reg.source = index >> 2;
102 f->reg.negate = src->negate;
103 f->reg.absolute = src->absolute;
104 f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
105 }
106 }
107
108 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
109 {
110 ppir_codegen_field_sampler *f = code;
111 ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
112
113 f->index = ldtex->sampler;
114 f->lod_bias_en = 0;
115 f->type = ppir_codegen_sampler_type_2d;
116 f->offset_en = 0;
117 f->unknown_2 = 0x39001;
118 }
119
120 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
121 {
122 ppir_codegen_field_uniform *f = code;
123 ppir_load_node *load = ppir_node_to_load(node);
124
125 switch (node->op) {
126 case ppir_op_load_uniform:
127 f->source = ppir_codegen_uniform_src_uniform;
128 break;
129 case ppir_op_load_temp:
130 f->source = ppir_codegen_uniform_src_temporary;
131 break;
132 default:
133 assert(0);
134 }
135
136 int num_components = load->num_components;
137 int alignment = num_components == 4 ? 2 : num_components - 1;
138
139 f->alignment = alignment;
140
141 /* TODO: uniform can be also combined like varying */
142 f->index = load->index << (2 - alignment);
143 }
144
145 static unsigned shift_to_op(int shift)
146 {
147 assert(shift >= -3 && shift <= 3);
148 return shift < 0 ? shift + 8 : shift;
149 }
150
151 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
152 {
153 ppir_codegen_field_vec4_mul *f = code;
154 ppir_alu_node *alu = ppir_node_to_alu(node);
155
156 ppir_dest *dest = &alu->dest;
157 int dest_shift = 0;
158 if (dest->type != ppir_target_pipeline) {
159 int index = ppir_target_get_dest_reg_index(dest);
160 dest_shift = index & 0x3;
161 f->dest = index >> 2;
162 f->mask = dest->write_mask << dest_shift;
163 }
164 f->dest_modifier = dest->modifier;
165
166 switch (node->op) {
167 case ppir_op_mul:
168 f->op = shift_to_op(alu->shift);
169 break;
170 case ppir_op_mov:
171 f->op = ppir_codegen_vec4_mul_op_mov;
172 break;
173 case ppir_op_max:
174 f->op = ppir_codegen_vec4_mul_op_max;
175 break;
176 case ppir_op_min:
177 f->op = ppir_codegen_vec4_mul_op_min;
178 break;
179 case ppir_op_and:
180 f->op = ppir_codegen_vec4_mul_op_and;
181 break;
182 case ppir_op_or:
183 f->op = ppir_codegen_vec4_mul_op_or;
184 break;
185 case ppir_op_xor:
186 f->op = ppir_codegen_vec4_mul_op_xor;
187 break;
188 case ppir_op_gt:
189 f->op = ppir_codegen_vec4_mul_op_gt;
190 break;
191 case ppir_op_ge:
192 f->op = ppir_codegen_vec4_mul_op_ge;
193 break;
194 case ppir_op_eq:
195 f->op = ppir_codegen_vec4_mul_op_eq;
196 break;
197 case ppir_op_ne:
198 f->op = ppir_codegen_vec4_mul_op_ne;
199 break;
200 case ppir_op_not:
201 f->op = ppir_codegen_vec4_mul_op_not;
202 break;
203 default:
204 break;
205 }
206
207 ppir_src *src = alu->src;
208 int index = ppir_target_get_src_reg_index(src);
209 f->arg0_source = index >> 2;
210 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
211 f->arg0_absolute = src->absolute;
212 f->arg0_negate = src->negate;
213
214 if (alu->num_src == 2) {
215 src = alu->src + 1;
216 index = ppir_target_get_src_reg_index(src);
217 f->arg1_source = index >> 2;
218 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
219 f->arg1_absolute = src->absolute;
220 f->arg1_negate = src->negate;
221 }
222 }
223
224 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
225 {
226 ppir_codegen_field_float_mul *f = code;
227 ppir_alu_node *alu = ppir_node_to_alu(node);
228
229 ppir_dest *dest = &alu->dest;
230 int dest_component = ffs(dest->write_mask) - 1;
231 assert(dest_component >= 0);
232
233 if (dest->type != ppir_target_pipeline) {
234 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
235 f->output_en = true;
236 }
237 f->dest_modifier = dest->modifier;
238
239 switch (node->op) {
240 case ppir_op_mul:
241 f->op = shift_to_op(alu->shift);
242 break;
243 case ppir_op_mov:
244 f->op = ppir_codegen_float_mul_op_mov;
245 break;
246 case ppir_op_sel_cond:
247 f->op = ppir_codegen_float_mul_op_mov;
248 break;
249 case ppir_op_max:
250 f->op = ppir_codegen_float_mul_op_max;
251 break;
252 case ppir_op_min:
253 f->op = ppir_codegen_float_mul_op_min;
254 break;
255 case ppir_op_and:
256 f->op = ppir_codegen_float_mul_op_and;
257 break;
258 case ppir_op_or:
259 f->op = ppir_codegen_float_mul_op_or;
260 break;
261 case ppir_op_xor:
262 f->op = ppir_codegen_float_mul_op_xor;
263 break;
264 case ppir_op_gt:
265 f->op = ppir_codegen_float_mul_op_gt;
266 break;
267 case ppir_op_ge:
268 f->op = ppir_codegen_float_mul_op_ge;
269 break;
270 case ppir_op_eq:
271 f->op = ppir_codegen_float_mul_op_eq;
272 break;
273 case ppir_op_ne:
274 f->op = ppir_codegen_float_mul_op_ne;
275 break;
276 case ppir_op_not:
277 f->op = ppir_codegen_float_mul_op_not;
278 break;
279 default:
280 break;
281 }
282
283 ppir_src *src = alu->src;
284 f->arg0_source = get_scl_reg_index(src, dest_component);
285 f->arg0_absolute = src->absolute;
286 f->arg0_negate = src->negate;
287
288 if (alu->num_src == 2) {
289 src = alu->src + 1;
290 f->arg1_source = get_scl_reg_index(src, dest_component);
291 f->arg1_absolute = src->absolute;
292 f->arg1_negate = src->negate;
293 }
294 }
295
296 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
297 {
298 ppir_codegen_field_vec4_acc *f = code;
299 ppir_alu_node *alu = ppir_node_to_alu(node);
300
301 ppir_dest *dest = &alu->dest;
302 int index = ppir_target_get_dest_reg_index(dest);
303 int dest_shift = index & 0x3;
304 f->dest = index >> 2;
305 f->mask = dest->write_mask << dest_shift;
306 f->dest_modifier = dest->modifier;
307
308 switch (node->op) {
309 case ppir_op_add:
310 f->op = ppir_codegen_vec4_acc_op_add;
311 break;
312 case ppir_op_mov:
313 f->op = ppir_codegen_vec4_acc_op_mov;
314 break;
315 case ppir_op_sum3:
316 f->op = ppir_codegen_vec4_acc_op_sum3;
317 dest_shift = 0;
318 break;
319 case ppir_op_sum4:
320 f->op = ppir_codegen_vec4_acc_op_sum4;
321 dest_shift = 0;
322 break;
323 case ppir_op_floor:
324 f->op = ppir_codegen_vec4_acc_op_floor;
325 break;
326 case ppir_op_ceil:
327 f->op = ppir_codegen_vec4_acc_op_ceil;
328 break;
329 case ppir_op_fract:
330 f->op = ppir_codegen_vec4_acc_op_fract;
331 break;
332 case ppir_op_gt:
333 f->op = ppir_codegen_vec4_acc_op_gt;
334 break;
335 case ppir_op_ge:
336 f->op = ppir_codegen_vec4_acc_op_ge;
337 break;
338 case ppir_op_eq:
339 f->op = ppir_codegen_vec4_acc_op_eq;
340 break;
341 case ppir_op_ne:
342 f->op = ppir_codegen_vec4_acc_op_ne;
343 break;
344 case ppir_op_select:
345 f->op = ppir_codegen_vec4_acc_op_sel;
346 break;
347 case ppir_op_max:
348 f->op = ppir_codegen_vec4_acc_op_max;
349 break;
350 case ppir_op_min:
351 f->op = ppir_codegen_vec4_acc_op_min;
352 break;
353 default:
354 break;
355 }
356
357 ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
358 index = ppir_target_get_src_reg_index(src);
359
360 if (src->type == ppir_target_pipeline &&
361 src->pipeline == ppir_pipeline_reg_vmul)
362 f->mul_in = true;
363 else
364 f->arg0_source = index >> 2;
365
366 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
367 f->arg0_absolute = src->absolute;
368 f->arg0_negate = src->negate;
369
370 if (++src < alu->src + alu->num_src) {
371 index = ppir_target_get_src_reg_index(src);
372 f->arg1_source = index >> 2;
373 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
374 f->arg1_absolute = src->absolute;
375 f->arg1_negate = src->negate;
376 }
377 }
378
379 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
380 {
381 ppir_codegen_field_float_acc *f = code;
382 ppir_alu_node *alu = ppir_node_to_alu(node);
383
384 ppir_dest *dest = &alu->dest;
385 int dest_component = ffs(dest->write_mask) - 1;
386 assert(dest_component >= 0);
387
388 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
389 f->output_en = true;
390 f->dest_modifier = dest->modifier;
391
392 switch (node->op) {
393 case ppir_op_add:
394 f->op = shift_to_op(alu->shift);
395 break;
396 case ppir_op_mov:
397 f->op = ppir_codegen_float_acc_op_mov;
398 break;
399 case ppir_op_max:
400 f->op = ppir_codegen_float_acc_op_max;
401 break;
402 case ppir_op_min:
403 f->op = ppir_codegen_float_acc_op_min;
404 break;
405 case ppir_op_floor:
406 f->op = ppir_codegen_float_acc_op_floor;
407 break;
408 case ppir_op_ceil:
409 f->op = ppir_codegen_float_acc_op_ceil;
410 break;
411 case ppir_op_fract:
412 f->op = ppir_codegen_float_acc_op_fract;
413 break;
414 case ppir_op_gt:
415 f->op = ppir_codegen_float_acc_op_gt;
416 break;
417 case ppir_op_ge:
418 f->op = ppir_codegen_float_acc_op_ge;
419 break;
420 case ppir_op_eq:
421 f->op = ppir_codegen_float_acc_op_eq;
422 break;
423 case ppir_op_ne:
424 f->op = ppir_codegen_float_acc_op_ne;
425 break;
426 case ppir_op_select:
427 f->op = ppir_codegen_float_acc_op_sel;
428 break;
429 default:
430 break;
431 }
432
433 ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
434 if (src->type == ppir_target_pipeline &&
435 src->pipeline == ppir_pipeline_reg_fmul)
436 f->mul_in = true;
437 else
438 f->arg0_source = get_scl_reg_index(src, dest_component);
439 f->arg0_absolute = src->absolute;
440 f->arg0_negate = src->negate;
441
442 if (++src < alu->src + alu->num_src) {
443 f->arg1_source = get_scl_reg_index(src, dest_component);
444 f->arg1_absolute = src->absolute;
445 f->arg1_negate = src->negate;
446 }
447 }
448
449 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
450 {
451 ppir_codegen_field_combine *f = code;
452 ppir_alu_node *alu = ppir_node_to_alu(node);
453
454 switch (node->op) {
455 case ppir_op_rsqrt:
456 case ppir_op_log2:
457 case ppir_op_exp2:
458 case ppir_op_rcp:
459 case ppir_op_sqrt:
460 case ppir_op_sin:
461 case ppir_op_cos:
462 {
463 f->scalar.dest_vec = false;
464 f->scalar.arg1_en = false;
465
466 ppir_dest *dest = &alu->dest;
467 int dest_component = ffs(dest->write_mask) - 1;
468 assert(dest_component >= 0);
469 f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
470 f->scalar.dest_modifier = dest->modifier;
471
472 ppir_src *src = alu->src;
473 f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
474 f->scalar.arg0_absolute = src->absolute;
475 f->scalar.arg0_negate = src->negate;
476
477 switch (node->op) {
478 case ppir_op_rsqrt:
479 f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
480 break;
481 case ppir_op_log2:
482 f->scalar.op = ppir_codegen_combine_scalar_op_log2;
483 break;
484 case ppir_op_exp2:
485 f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
486 break;
487 case ppir_op_rcp:
488 f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
489 break;
490 case ppir_op_sqrt:
491 f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
492 break;
493 case ppir_op_sin:
494 f->scalar.op = ppir_codegen_combine_scalar_op_sin;
495 break;
496 case ppir_op_cos:
497 f->scalar.op = ppir_codegen_combine_scalar_op_cos;
498 break;
499 default:
500 break;
501 }
502 }
503 default:
504 break;
505 }
506 }
507
508 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
509 {
510 assert(node->op == ppir_op_store_temp);
511
512 ppir_codegen_field_temp_write *f = code;
513 ppir_store_node *snode = ppir_node_to_store(node);
514 int num_components = snode->num_components;
515
516 f->temp_write.dest = 0x03; // 11 - temporary
517 f->temp_write.source = snode->src.reg->index;
518
519 int alignment = num_components == 4 ? 2 : num_components - 1;
520 f->temp_write.alignment = alignment;
521 f->temp_write.index = snode->index << (2 - alignment);
522
523 f->temp_write.offset_reg = snode->index >> 2;
524 }
525
526 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
527 {
528 for (int i = 0; i < constant->num; i++)
529 code[i] = util_float_to_half(constant->value[i].f);
530 }
531
532 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
533 {
534 ppir_codegen_field_branch *b = code;
535 assert(node->op == ppir_op_discard);
536
537 b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
538 b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
539 b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
540 }
541
542 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
543 {
544 ppir_codegen_field_branch *b = code;
545 ppir_branch_node *branch;
546 ppir_instr *target_instr;
547 if (node->op == ppir_op_discard) {
548 ppir_codegen_encode_discard(node, code);
549 return;
550 }
551
552 assert(node->op == ppir_op_branch);
553 branch = ppir_node_to_branch(node);
554
555 b->branch.unknown_0 = 0x0;
556 b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
557 b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
558 b->branch.cond_gt = branch->cond_gt;
559 b->branch.cond_eq = branch->cond_eq;
560 b->branch.cond_lt = branch->cond_lt;
561 b->branch.unknown_1 = 0x0;
562
563 target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
564 b->branch.target = target_instr->offset - node->instr->offset;
565 b->branch.next_count = target_instr->encode_size;
566 }
567
568 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
569
570 static const ppir_codegen_instr_slot_encode_func
571 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
572 [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
573 [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
574 [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
575 [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
576 [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
577 [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
578 [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
579 [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
580 [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
581 [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
582 };
583
584 static const int ppir_codegen_field_size[] = {
585 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
586 };
587
588 static inline int align_to_word(int size)
589 {
590 return ((size + 0x1f) >> 5);
591 }
592
593 static int get_instr_encode_size(ppir_instr *instr)
594 {
595 int size = 0;
596
597 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
598 if (instr->slots[i])
599 size += ppir_codegen_field_size[i];
600 }
601
602 for (int i = 0; i < 2; i++) {
603 if (instr->constant[i].num)
604 size += 64;
605 }
606
607 return align_to_word(size) + 1;
608 }
609
610 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
611 {
612 int off1 = dst_offset & 0x1f;
613 uint32_t *cpy_dst = dst, *cpy_src = src;
614
615 cpy_dst += (dst_offset >> 5);
616
617 if (off1) {
618 int off2 = 32 - off1;
619 int cpy_size = 0;
620 while (1) {
621 *cpy_dst |= *cpy_src << off1;
622 cpy_dst++;
623
624 cpy_size += off2;
625 if (cpy_size >= src_size)
626 break;
627
628 *cpy_dst |= *cpy_src >> off2;
629 cpy_src++;
630
631 cpy_size += off1;
632 if (cpy_size >= src_size)
633 break;
634 }
635 }
636 else
637 memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
638 }
639
640 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
641 {
642 int size = 0;
643 ppir_codegen_ctrl *ctrl = code;
644
645 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
646 if (instr->slots[i]) {
647 /* max field size (73), align to dword */
648 uint8_t output[12] = {0};
649
650 ppir_codegen_encode_slot[i](instr->slots[i], output);
651 bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
652
653 size += ppir_codegen_field_size[i];
654 ctrl->fields |= 1 << i;
655 }
656 }
657
658 if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
659 ctrl->sync = true;
660
661 for (int i = 0; i < 2; i++) {
662 if (instr->constant[i].num) {
663 uint16_t output[4] = {0};
664
665 ppir_codegen_encode_const(instr->constant + i, output);
666 bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
667
668 size += 64;
669 ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
670 }
671 }
672
673 size = align_to_word(size) + 1;
674
675 ctrl->count = size;
676 if (instr->is_end)
677 ctrl->stop = true;
678
679 if (last_code) {
680 ppir_codegen_ctrl *last_ctrl = last_code;
681 last_ctrl->next_count = size;
682 last_ctrl->prefetch = true;
683 }
684
685 return size;
686 }
687
688 static void ppir_codegen_print_prog(ppir_compiler *comp)
689 {
690 uint32_t *prog = comp->prog->shader;
691 unsigned offset = 0;
692
693 printf("========ppir codegen========\n");
694 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
695 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
696 printf("%03d (@%6d): ", instr->index, instr->offset);
697 int n = prog[0] & 0x1f;
698 for (int i = 0; i < n; i++) {
699 if (i && i % 6 == 0)
700 printf("\n ");
701 printf("%08x ", prog[i]);
702 }
703 printf("\n");
704 ppir_disassemble_instr(prog, offset);
705 prog += n;
706 offset += n;
707 }
708 }
709 printf("-----------------------\n");
710 }
711
712 bool ppir_codegen_prog(ppir_compiler *comp)
713 {
714 int size = 0;
715 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
716 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
717 instr->offset = size;
718 instr->encode_size = get_instr_encode_size(instr);
719 size += instr->encode_size;
720 }
721 }
722
723 uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
724 if (!prog)
725 return false;
726
727 uint32_t *code = prog, *last_code = NULL;
728 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
729 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
730 int offset = encode_instr(instr, code, last_code);
731 last_code = code;
732 code += offset;
733 }
734 }
735
736 comp->prog->shader = prog;
737 comp->prog->shader_size = size * sizeof(uint32_t);
738
739 if (lima_debug & LIMA_DEBUG_PP)
740 ppir_codegen_print_prog(comp);
741
742 return true;
743 }