lima/ppir: turn store_color into ALU node
[mesa.git] / src / gallium / drivers / lima / ir / pp / codegen.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/ralloc.h"
26 #include "util/u_half.h"
27 #include "util/bitscan.h"
28
29 #include "ppir.h"
30 #include "codegen.h"
31 #include "lima_context.h"
32
33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34 {
35 unsigned ret = 0;
36 for (int i = 0; i < 4; i++)
37 ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38 return ret;
39 }
40
41 static int get_scl_reg_index(ppir_src *src, int component)
42 {
43 int ret = ppir_target_get_src_reg_index(src);
44 ret += src->swizzle[component];
45 return ret;
46 }
47
48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49 {
50 ppir_codegen_field_varying *f = code;
51 ppir_load_node *load = ppir_node_to_load(node);
52 ppir_dest *dest = &load->dest;
53 int index = ppir_target_get_dest_reg_index(dest);
54 int num_components = load->num_components;
55
56 if (num_components) {
57 assert(node->op == ppir_op_load_varying ||
58 node->op == ppir_op_load_coords ||
59 node->op == ppir_op_load_fragcoord ||
60 node->op == ppir_op_load_pointcoord ||
61 node->op == ppir_op_load_frontface);
62
63 f->imm.dest = index >> 2;
64 f->imm.mask = dest->write_mask << (index & 0x3);
65
66 int alignment = num_components == 3 ? 3 : num_components - 1;
67 f->imm.alignment = alignment;
68 f->imm.offset_vector = 0xf;
69
70 if (alignment == 3)
71 f->imm.index = load->index >> 2;
72 else
73 f->imm.index = load->index >> alignment;
74
75 switch (node->op) {
76 case ppir_op_load_fragcoord:
77 f->imm.source_type = 2;
78 f->imm.perspective = 3;
79 break;
80 case ppir_op_load_pointcoord:
81 f->imm.source_type = 3;
82 break;
83 case ppir_op_load_frontface:
84 f->imm.source_type = 3;
85 f->imm.perspective = 1;
86 break;
87 default:
88 break;
89 }
90 }
91 else {
92 assert(node->op == ppir_op_load_coords);
93
94 f->reg.dest = index >> 2;
95 f->reg.mask = dest->write_mask << (index & 0x3);
96
97 f->reg.source_type = 1;
98
99 ppir_src *src = &load->src;
100 index = ppir_target_get_src_reg_index(src);
101 f->reg.source = index >> 2;
102 f->reg.negate = src->negate;
103 f->reg.absolute = src->absolute;
104 f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
105 }
106 }
107
108 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
109 {
110 ppir_codegen_field_sampler *f = code;
111 ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
112
113 f->index = ldtex->sampler;
114 f->lod_bias_en = 0;
115 f->type = ppir_codegen_sampler_type_2d;
116 f->offset_en = 0;
117 f->unknown_2 = 0x39001;
118 }
119
120 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
121 {
122 ppir_codegen_field_uniform *f = code;
123 ppir_load_node *load = ppir_node_to_load(node);
124
125 switch (node->op) {
126 case ppir_op_load_uniform:
127 f->source = ppir_codegen_uniform_src_uniform;
128 break;
129 case ppir_op_load_temp:
130 f->source = ppir_codegen_uniform_src_temporary;
131 break;
132 default:
133 assert(0);
134 }
135
136 int num_components = load->num_components;
137 int alignment = num_components == 4 ? 2 : num_components - 1;
138
139 f->alignment = alignment;
140
141 /* TODO: uniform can be also combined like varying */
142 f->index = load->index << (2 - alignment);
143 }
144
145 static unsigned shift_to_op(int shift)
146 {
147 assert(shift >= -3 && shift <= 3);
148 return shift < 0 ? shift + 8 : shift;
149 }
150
151 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
152 {
153 ppir_codegen_field_vec4_mul *f = code;
154 ppir_alu_node *alu = ppir_node_to_alu(node);
155
156 ppir_dest *dest = &alu->dest;
157 int dest_shift = 0;
158 if (dest->type != ppir_target_pipeline) {
159 int index = ppir_target_get_dest_reg_index(dest);
160 dest_shift = index & 0x3;
161 f->dest = index >> 2;
162 f->mask = dest->write_mask << dest_shift;
163 }
164 f->dest_modifier = dest->modifier;
165
166 switch (node->op) {
167 case ppir_op_mul:
168 f->op = shift_to_op(alu->shift);
169 break;
170 case ppir_op_mov:
171 case ppir_op_store_color:
172 f->op = ppir_codegen_vec4_mul_op_mov;
173 break;
174 case ppir_op_max:
175 f->op = ppir_codegen_vec4_mul_op_max;
176 break;
177 case ppir_op_min:
178 f->op = ppir_codegen_vec4_mul_op_min;
179 break;
180 case ppir_op_and:
181 f->op = ppir_codegen_vec4_mul_op_and;
182 break;
183 case ppir_op_or:
184 f->op = ppir_codegen_vec4_mul_op_or;
185 break;
186 case ppir_op_xor:
187 f->op = ppir_codegen_vec4_mul_op_xor;
188 break;
189 case ppir_op_gt:
190 f->op = ppir_codegen_vec4_mul_op_gt;
191 break;
192 case ppir_op_ge:
193 f->op = ppir_codegen_vec4_mul_op_ge;
194 break;
195 case ppir_op_eq:
196 f->op = ppir_codegen_vec4_mul_op_eq;
197 break;
198 case ppir_op_ne:
199 f->op = ppir_codegen_vec4_mul_op_ne;
200 break;
201 case ppir_op_not:
202 f->op = ppir_codegen_vec4_mul_op_not;
203 break;
204 default:
205 break;
206 }
207
208 ppir_src *src = alu->src;
209 int index = ppir_target_get_src_reg_index(src);
210 f->arg0_source = index >> 2;
211 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
212 f->arg0_absolute = src->absolute;
213 f->arg0_negate = src->negate;
214
215 if (alu->num_src == 2) {
216 src = alu->src + 1;
217 index = ppir_target_get_src_reg_index(src);
218 f->arg1_source = index >> 2;
219 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
220 f->arg1_absolute = src->absolute;
221 f->arg1_negate = src->negate;
222 }
223 }
224
225 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
226 {
227 ppir_codegen_field_float_mul *f = code;
228 ppir_alu_node *alu = ppir_node_to_alu(node);
229
230 ppir_dest *dest = &alu->dest;
231 int dest_component = ffs(dest->write_mask) - 1;
232 assert(dest_component >= 0);
233
234 if (dest->type != ppir_target_pipeline) {
235 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
236 f->output_en = true;
237 }
238 f->dest_modifier = dest->modifier;
239
240 switch (node->op) {
241 case ppir_op_mul:
242 f->op = shift_to_op(alu->shift);
243 break;
244 case ppir_op_mov:
245 f->op = ppir_codegen_float_mul_op_mov;
246 break;
247 case ppir_op_sel_cond:
248 f->op = ppir_codegen_float_mul_op_mov;
249 break;
250 case ppir_op_max:
251 f->op = ppir_codegen_float_mul_op_max;
252 break;
253 case ppir_op_min:
254 f->op = ppir_codegen_float_mul_op_min;
255 break;
256 case ppir_op_and:
257 f->op = ppir_codegen_float_mul_op_and;
258 break;
259 case ppir_op_or:
260 f->op = ppir_codegen_float_mul_op_or;
261 break;
262 case ppir_op_xor:
263 f->op = ppir_codegen_float_mul_op_xor;
264 break;
265 case ppir_op_gt:
266 f->op = ppir_codegen_float_mul_op_gt;
267 break;
268 case ppir_op_ge:
269 f->op = ppir_codegen_float_mul_op_ge;
270 break;
271 case ppir_op_eq:
272 f->op = ppir_codegen_float_mul_op_eq;
273 break;
274 case ppir_op_ne:
275 f->op = ppir_codegen_float_mul_op_ne;
276 break;
277 case ppir_op_not:
278 f->op = ppir_codegen_float_mul_op_not;
279 break;
280 default:
281 break;
282 }
283
284 ppir_src *src = alu->src;
285 f->arg0_source = get_scl_reg_index(src, dest_component);
286 f->arg0_absolute = src->absolute;
287 f->arg0_negate = src->negate;
288
289 if (alu->num_src == 2) {
290 src = alu->src + 1;
291 f->arg1_source = get_scl_reg_index(src, dest_component);
292 f->arg1_absolute = src->absolute;
293 f->arg1_negate = src->negate;
294 }
295 }
296
297 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
298 {
299 ppir_codegen_field_vec4_acc *f = code;
300 ppir_alu_node *alu = ppir_node_to_alu(node);
301
302 ppir_dest *dest = &alu->dest;
303 int index = ppir_target_get_dest_reg_index(dest);
304 int dest_shift = index & 0x3;
305 f->dest = index >> 2;
306 f->mask = dest->write_mask << dest_shift;
307 f->dest_modifier = dest->modifier;
308
309 switch (node->op) {
310 case ppir_op_add:
311 f->op = ppir_codegen_vec4_acc_op_add;
312 break;
313 case ppir_op_mov:
314 case ppir_op_store_color:
315 f->op = ppir_codegen_vec4_acc_op_mov;
316 break;
317 case ppir_op_sum3:
318 f->op = ppir_codegen_vec4_acc_op_sum3;
319 dest_shift = 0;
320 break;
321 case ppir_op_sum4:
322 f->op = ppir_codegen_vec4_acc_op_sum4;
323 dest_shift = 0;
324 break;
325 case ppir_op_floor:
326 f->op = ppir_codegen_vec4_acc_op_floor;
327 break;
328 case ppir_op_ceil:
329 f->op = ppir_codegen_vec4_acc_op_ceil;
330 break;
331 case ppir_op_fract:
332 f->op = ppir_codegen_vec4_acc_op_fract;
333 break;
334 case ppir_op_gt:
335 f->op = ppir_codegen_vec4_acc_op_gt;
336 break;
337 case ppir_op_ge:
338 f->op = ppir_codegen_vec4_acc_op_ge;
339 break;
340 case ppir_op_eq:
341 f->op = ppir_codegen_vec4_acc_op_eq;
342 break;
343 case ppir_op_ne:
344 f->op = ppir_codegen_vec4_acc_op_ne;
345 break;
346 case ppir_op_select:
347 f->op = ppir_codegen_vec4_acc_op_sel;
348 break;
349 case ppir_op_max:
350 f->op = ppir_codegen_vec4_acc_op_max;
351 break;
352 case ppir_op_min:
353 f->op = ppir_codegen_vec4_acc_op_min;
354 break;
355 case ppir_op_ddx:
356 f->op = ppir_codegen_vec4_acc_op_dFdx;
357 break;
358 case ppir_op_ddy:
359 f->op = ppir_codegen_vec4_acc_op_dFdy;
360 break;
361 default:
362 break;
363 }
364
365 ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
366 index = ppir_target_get_src_reg_index(src);
367
368 if (src->type == ppir_target_pipeline &&
369 src->pipeline == ppir_pipeline_reg_vmul)
370 f->mul_in = true;
371 else
372 f->arg0_source = index >> 2;
373
374 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
375 f->arg0_absolute = src->absolute;
376 f->arg0_negate = src->negate;
377
378 if (++src < alu->src + alu->num_src) {
379 index = ppir_target_get_src_reg_index(src);
380 f->arg1_source = index >> 2;
381 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
382 f->arg1_absolute = src->absolute;
383 f->arg1_negate = src->negate;
384 }
385 }
386
387 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
388 {
389 ppir_codegen_field_float_acc *f = code;
390 ppir_alu_node *alu = ppir_node_to_alu(node);
391
392 ppir_dest *dest = &alu->dest;
393 int dest_component = ffs(dest->write_mask) - 1;
394 assert(dest_component >= 0);
395
396 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
397 f->output_en = true;
398 f->dest_modifier = dest->modifier;
399
400 switch (node->op) {
401 case ppir_op_add:
402 f->op = shift_to_op(alu->shift);
403 break;
404 case ppir_op_mov:
405 f->op = ppir_codegen_float_acc_op_mov;
406 break;
407 case ppir_op_max:
408 f->op = ppir_codegen_float_acc_op_max;
409 break;
410 case ppir_op_min:
411 f->op = ppir_codegen_float_acc_op_min;
412 break;
413 case ppir_op_floor:
414 f->op = ppir_codegen_float_acc_op_floor;
415 break;
416 case ppir_op_ceil:
417 f->op = ppir_codegen_float_acc_op_ceil;
418 break;
419 case ppir_op_fract:
420 f->op = ppir_codegen_float_acc_op_fract;
421 break;
422 case ppir_op_gt:
423 f->op = ppir_codegen_float_acc_op_gt;
424 break;
425 case ppir_op_ge:
426 f->op = ppir_codegen_float_acc_op_ge;
427 break;
428 case ppir_op_eq:
429 f->op = ppir_codegen_float_acc_op_eq;
430 break;
431 case ppir_op_ne:
432 f->op = ppir_codegen_float_acc_op_ne;
433 break;
434 case ppir_op_select:
435 f->op = ppir_codegen_float_acc_op_sel;
436 break;
437 case ppir_op_ddx:
438 f->op = ppir_codegen_float_acc_op_dFdx;
439 break;
440 case ppir_op_ddy:
441 f->op = ppir_codegen_float_acc_op_dFdy;
442 break;
443 default:
444 break;
445 }
446
447 ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
448 if (src->type == ppir_target_pipeline &&
449 src->pipeline == ppir_pipeline_reg_fmul)
450 f->mul_in = true;
451 else
452 f->arg0_source = get_scl_reg_index(src, dest_component);
453 f->arg0_absolute = src->absolute;
454 f->arg0_negate = src->negate;
455
456 if (++src < alu->src + alu->num_src) {
457 f->arg1_source = get_scl_reg_index(src, dest_component);
458 f->arg1_absolute = src->absolute;
459 f->arg1_negate = src->negate;
460 }
461 }
462
463 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
464 {
465 ppir_codegen_field_combine *f = code;
466 ppir_alu_node *alu = ppir_node_to_alu(node);
467
468 switch (node->op) {
469 case ppir_op_rsqrt:
470 case ppir_op_log2:
471 case ppir_op_exp2:
472 case ppir_op_rcp:
473 case ppir_op_sqrt:
474 case ppir_op_sin:
475 case ppir_op_cos:
476 {
477 f->scalar.dest_vec = false;
478 f->scalar.arg1_en = false;
479
480 ppir_dest *dest = &alu->dest;
481 int dest_component = ffs(dest->write_mask) - 1;
482 assert(dest_component >= 0);
483 f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
484 f->scalar.dest_modifier = dest->modifier;
485
486 ppir_src *src = alu->src;
487 f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
488 f->scalar.arg0_absolute = src->absolute;
489 f->scalar.arg0_negate = src->negate;
490
491 switch (node->op) {
492 case ppir_op_rsqrt:
493 f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
494 break;
495 case ppir_op_log2:
496 f->scalar.op = ppir_codegen_combine_scalar_op_log2;
497 break;
498 case ppir_op_exp2:
499 f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
500 break;
501 case ppir_op_rcp:
502 f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
503 break;
504 case ppir_op_sqrt:
505 f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
506 break;
507 case ppir_op_sin:
508 f->scalar.op = ppir_codegen_combine_scalar_op_sin;
509 break;
510 case ppir_op_cos:
511 f->scalar.op = ppir_codegen_combine_scalar_op_cos;
512 break;
513 default:
514 break;
515 }
516 }
517 default:
518 break;
519 }
520 }
521
522 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
523 {
524 assert(node->op == ppir_op_store_temp);
525
526 ppir_codegen_field_temp_write *f = code;
527 ppir_store_node *snode = ppir_node_to_store(node);
528 int num_components = snode->num_components;
529
530 f->temp_write.dest = 0x03; // 11 - temporary
531 f->temp_write.source = snode->src.reg->index;
532
533 int alignment = num_components == 4 ? 2 : num_components - 1;
534 f->temp_write.alignment = alignment;
535 f->temp_write.index = snode->index << (2 - alignment);
536
537 f->temp_write.offset_reg = snode->index >> 2;
538 }
539
540 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
541 {
542 for (int i = 0; i < constant->num; i++)
543 code[i] = util_float_to_half(constant->value[i].f);
544 }
545
546 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
547 {
548 ppir_codegen_field_branch *b = code;
549 assert(node->op == ppir_op_discard);
550
551 b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
552 b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
553 b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
554 }
555
556 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
557 {
558 ppir_codegen_field_branch *b = code;
559 ppir_branch_node *branch;
560 ppir_instr *target_instr;
561 if (node->op == ppir_op_discard) {
562 ppir_codegen_encode_discard(node, code);
563 return;
564 }
565
566 assert(node->op == ppir_op_branch);
567 branch = ppir_node_to_branch(node);
568
569 b->branch.unknown_0 = 0x0;
570 b->branch.unknown_1 = 0x0;
571
572 if (branch->num_src == 2) {
573 b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
574 b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
575 b->branch.cond_gt = branch->cond_gt;
576 b->branch.cond_eq = branch->cond_eq;
577 b->branch.cond_lt = branch->cond_lt;
578 } else if (branch->num_src == 0) {
579 /* Unconditional branch */
580 b->branch.arg0_source = 0;
581 b->branch.arg1_source = 0;
582 b->branch.cond_gt = true;
583 b->branch.cond_eq = true;
584 b->branch.cond_lt = true;
585 } else {
586 assert(false);
587 }
588
589 target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
590 b->branch.target = target_instr->offset - node->instr->offset;
591 b->branch.next_count = target_instr->encode_size;
592 }
593
594 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
595
596 static const ppir_codegen_instr_slot_encode_func
597 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
598 [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
599 [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
600 [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
601 [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
602 [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
603 [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
604 [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
605 [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
606 [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
607 [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
608 };
609
610 static const int ppir_codegen_field_size[] = {
611 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
612 };
613
614 static inline int align_to_word(int size)
615 {
616 return ((size + 0x1f) >> 5);
617 }
618
619 static int get_instr_encode_size(ppir_instr *instr)
620 {
621 int size = 0;
622
623 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
624 if (instr->slots[i])
625 size += ppir_codegen_field_size[i];
626 }
627
628 for (int i = 0; i < 2; i++) {
629 if (instr->constant[i].num)
630 size += 64;
631 }
632
633 return align_to_word(size) + 1;
634 }
635
636 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
637 {
638 int off1 = dst_offset & 0x1f;
639 uint32_t *cpy_dst = dst, *cpy_src = src;
640
641 cpy_dst += (dst_offset >> 5);
642
643 if (off1) {
644 int off2 = 32 - off1;
645 int cpy_size = 0;
646 while (1) {
647 *cpy_dst |= *cpy_src << off1;
648 cpy_dst++;
649
650 cpy_size += off2;
651 if (cpy_size >= src_size)
652 break;
653
654 *cpy_dst |= *cpy_src >> off2;
655 cpy_src++;
656
657 cpy_size += off1;
658 if (cpy_size >= src_size)
659 break;
660 }
661 }
662 else
663 memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
664 }
665
666 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
667 {
668 int size = 0;
669 ppir_codegen_ctrl *ctrl = code;
670
671 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
672 if (instr->slots[i]) {
673 /* max field size (73), align to dword */
674 uint8_t output[12] = {0};
675
676 ppir_codegen_encode_slot[i](instr->slots[i], output);
677 bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
678
679 size += ppir_codegen_field_size[i];
680 ctrl->fields |= 1 << i;
681 }
682 }
683
684 if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
685 ctrl->sync = true;
686
687 if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
688 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
689 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
690 ctrl->sync = true;
691 }
692
693 if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
694 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
695 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
696 ctrl->sync = true;
697 }
698
699 for (int i = 0; i < 2; i++) {
700 if (instr->constant[i].num) {
701 uint16_t output[4] = {0};
702
703 ppir_codegen_encode_const(instr->constant + i, output);
704 bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
705
706 size += 64;
707 ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
708 }
709 }
710
711 size = align_to_word(size) + 1;
712
713 ctrl->count = size;
714 if (instr->is_end)
715 ctrl->stop = true;
716
717 if (last_code) {
718 ppir_codegen_ctrl *last_ctrl = last_code;
719 last_ctrl->next_count = size;
720 last_ctrl->prefetch = true;
721 }
722
723 return size;
724 }
725
726 static void ppir_codegen_print_prog(ppir_compiler *comp)
727 {
728 uint32_t *prog = comp->prog->shader;
729 unsigned offset = 0;
730
731 printf("========ppir codegen========\n");
732 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
733 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
734 printf("%03d (@%6d): ", instr->index, instr->offset);
735 int n = prog[0] & 0x1f;
736 for (int i = 0; i < n; i++) {
737 if (i && i % 6 == 0)
738 printf("\n ");
739 printf("%08x ", prog[i]);
740 }
741 printf("\n");
742 ppir_disassemble_instr(prog, offset);
743 prog += n;
744 offset += n;
745 }
746 }
747 printf("-----------------------\n");
748 }
749
750 bool ppir_codegen_prog(ppir_compiler *comp)
751 {
752 int size = 0;
753 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
754 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
755 instr->offset = size;
756 instr->encode_size = get_instr_encode_size(instr);
757 size += instr->encode_size;
758 }
759 }
760
761 uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
762 if (!prog)
763 return false;
764
765 uint32_t *code = prog, *last_code = NULL;
766 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
767 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
768 int offset = encode_instr(instr, code, last_code);
769 last_code = code;
770 code += offset;
771 }
772 }
773
774 comp->prog->shader = prog;
775 comp->prog->shader_size = size * sizeof(uint32_t);
776
777 if (lima_debug & LIMA_DEBUG_PP)
778 ppir_codegen_print_prog(comp);
779
780 return true;
781 }