lima/ppir: Add fddx and fddy
[mesa.git] / src / gallium / drivers / lima / ir / pp / codegen.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/ralloc.h"
26 #include "util/u_half.h"
27 #include "util/bitscan.h"
28
29 #include "ppir.h"
30 #include "codegen.h"
31 #include "lima_context.h"
32
33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34 {
35 unsigned ret = 0;
36 for (int i = 0; i < 4; i++)
37 ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38 return ret;
39 }
40
41 static int get_scl_reg_index(ppir_src *src, int component)
42 {
43 int ret = ppir_target_get_src_reg_index(src);
44 ret += src->swizzle[component];
45 return ret;
46 }
47
48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49 {
50 ppir_codegen_field_varying *f = code;
51 ppir_load_node *load = ppir_node_to_load(node);
52 ppir_dest *dest = &load->dest;
53 int index = ppir_target_get_dest_reg_index(dest);
54 int num_components = load->num_components;
55
56 if (num_components) {
57 assert(node->op == ppir_op_load_varying ||
58 node->op == ppir_op_load_coords ||
59 node->op == ppir_op_load_fragcoord ||
60 node->op == ppir_op_load_pointcoord ||
61 node->op == ppir_op_load_frontface);
62
63 f->imm.dest = index >> 2;
64 f->imm.mask = dest->write_mask << (index & 0x3);
65
66 int alignment = num_components == 3 ? 3 : num_components - 1;
67 f->imm.alignment = alignment;
68 f->imm.offset_vector = 0xf;
69
70 if (alignment == 3)
71 f->imm.index = load->index >> 2;
72 else
73 f->imm.index = load->index >> alignment;
74
75 switch (node->op) {
76 case ppir_op_load_fragcoord:
77 f->imm.source_type = 2;
78 f->imm.perspective = 3;
79 break;
80 case ppir_op_load_pointcoord:
81 f->imm.source_type = 3;
82 break;
83 case ppir_op_load_frontface:
84 f->imm.source_type = 3;
85 f->imm.perspective = 1;
86 break;
87 default:
88 break;
89 }
90 }
91 else {
92 assert(node->op == ppir_op_load_coords);
93
94 f->reg.dest = index >> 2;
95 f->reg.mask = dest->write_mask << (index & 0x3);
96
97 f->reg.source_type = 1;
98
99 ppir_src *src = &load->src;
100 index = ppir_target_get_src_reg_index(src);
101 f->reg.source = index >> 2;
102 f->reg.negate = src->negate;
103 f->reg.absolute = src->absolute;
104 f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
105 }
106 }
107
108 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
109 {
110 ppir_codegen_field_sampler *f = code;
111 ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
112
113 f->index = ldtex->sampler;
114 f->lod_bias_en = 0;
115 f->type = ppir_codegen_sampler_type_2d;
116 f->offset_en = 0;
117 f->unknown_2 = 0x39001;
118 }
119
120 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
121 {
122 ppir_codegen_field_uniform *f = code;
123 ppir_load_node *load = ppir_node_to_load(node);
124
125 switch (node->op) {
126 case ppir_op_load_uniform:
127 f->source = ppir_codegen_uniform_src_uniform;
128 break;
129 case ppir_op_load_temp:
130 f->source = ppir_codegen_uniform_src_temporary;
131 break;
132 default:
133 assert(0);
134 }
135
136 int num_components = load->num_components;
137 int alignment = num_components == 4 ? 2 : num_components - 1;
138
139 f->alignment = alignment;
140
141 /* TODO: uniform can be also combined like varying */
142 f->index = load->index << (2 - alignment);
143 }
144
145 static unsigned shift_to_op(int shift)
146 {
147 assert(shift >= -3 && shift <= 3);
148 return shift < 0 ? shift + 8 : shift;
149 }
150
151 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
152 {
153 ppir_codegen_field_vec4_mul *f = code;
154 ppir_alu_node *alu = ppir_node_to_alu(node);
155
156 ppir_dest *dest = &alu->dest;
157 int dest_shift = 0;
158 if (dest->type != ppir_target_pipeline) {
159 int index = ppir_target_get_dest_reg_index(dest);
160 dest_shift = index & 0x3;
161 f->dest = index >> 2;
162 f->mask = dest->write_mask << dest_shift;
163 }
164 f->dest_modifier = dest->modifier;
165
166 switch (node->op) {
167 case ppir_op_mul:
168 f->op = shift_to_op(alu->shift);
169 break;
170 case ppir_op_mov:
171 f->op = ppir_codegen_vec4_mul_op_mov;
172 break;
173 case ppir_op_max:
174 f->op = ppir_codegen_vec4_mul_op_max;
175 break;
176 case ppir_op_min:
177 f->op = ppir_codegen_vec4_mul_op_min;
178 break;
179 case ppir_op_and:
180 f->op = ppir_codegen_vec4_mul_op_and;
181 break;
182 case ppir_op_or:
183 f->op = ppir_codegen_vec4_mul_op_or;
184 break;
185 case ppir_op_xor:
186 f->op = ppir_codegen_vec4_mul_op_xor;
187 break;
188 case ppir_op_gt:
189 f->op = ppir_codegen_vec4_mul_op_gt;
190 break;
191 case ppir_op_ge:
192 f->op = ppir_codegen_vec4_mul_op_ge;
193 break;
194 case ppir_op_eq:
195 f->op = ppir_codegen_vec4_mul_op_eq;
196 break;
197 case ppir_op_ne:
198 f->op = ppir_codegen_vec4_mul_op_ne;
199 break;
200 case ppir_op_not:
201 f->op = ppir_codegen_vec4_mul_op_not;
202 break;
203 default:
204 break;
205 }
206
207 ppir_src *src = alu->src;
208 int index = ppir_target_get_src_reg_index(src);
209 f->arg0_source = index >> 2;
210 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
211 f->arg0_absolute = src->absolute;
212 f->arg0_negate = src->negate;
213
214 if (alu->num_src == 2) {
215 src = alu->src + 1;
216 index = ppir_target_get_src_reg_index(src);
217 f->arg1_source = index >> 2;
218 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
219 f->arg1_absolute = src->absolute;
220 f->arg1_negate = src->negate;
221 }
222 }
223
224 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
225 {
226 ppir_codegen_field_float_mul *f = code;
227 ppir_alu_node *alu = ppir_node_to_alu(node);
228
229 ppir_dest *dest = &alu->dest;
230 int dest_component = ffs(dest->write_mask) - 1;
231 assert(dest_component >= 0);
232
233 if (dest->type != ppir_target_pipeline) {
234 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
235 f->output_en = true;
236 }
237 f->dest_modifier = dest->modifier;
238
239 switch (node->op) {
240 case ppir_op_mul:
241 f->op = shift_to_op(alu->shift);
242 break;
243 case ppir_op_mov:
244 f->op = ppir_codegen_float_mul_op_mov;
245 break;
246 case ppir_op_sel_cond:
247 f->op = ppir_codegen_float_mul_op_mov;
248 break;
249 case ppir_op_max:
250 f->op = ppir_codegen_float_mul_op_max;
251 break;
252 case ppir_op_min:
253 f->op = ppir_codegen_float_mul_op_min;
254 break;
255 case ppir_op_and:
256 f->op = ppir_codegen_float_mul_op_and;
257 break;
258 case ppir_op_or:
259 f->op = ppir_codegen_float_mul_op_or;
260 break;
261 case ppir_op_xor:
262 f->op = ppir_codegen_float_mul_op_xor;
263 break;
264 case ppir_op_gt:
265 f->op = ppir_codegen_float_mul_op_gt;
266 break;
267 case ppir_op_ge:
268 f->op = ppir_codegen_float_mul_op_ge;
269 break;
270 case ppir_op_eq:
271 f->op = ppir_codegen_float_mul_op_eq;
272 break;
273 case ppir_op_ne:
274 f->op = ppir_codegen_float_mul_op_ne;
275 break;
276 case ppir_op_not:
277 f->op = ppir_codegen_float_mul_op_not;
278 break;
279 default:
280 break;
281 }
282
283 ppir_src *src = alu->src;
284 f->arg0_source = get_scl_reg_index(src, dest_component);
285 f->arg0_absolute = src->absolute;
286 f->arg0_negate = src->negate;
287
288 if (alu->num_src == 2) {
289 src = alu->src + 1;
290 f->arg1_source = get_scl_reg_index(src, dest_component);
291 f->arg1_absolute = src->absolute;
292 f->arg1_negate = src->negate;
293 }
294 }
295
296 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
297 {
298 ppir_codegen_field_vec4_acc *f = code;
299 ppir_alu_node *alu = ppir_node_to_alu(node);
300
301 ppir_dest *dest = &alu->dest;
302 int index = ppir_target_get_dest_reg_index(dest);
303 int dest_shift = index & 0x3;
304 f->dest = index >> 2;
305 f->mask = dest->write_mask << dest_shift;
306 f->dest_modifier = dest->modifier;
307
308 switch (node->op) {
309 case ppir_op_add:
310 f->op = ppir_codegen_vec4_acc_op_add;
311 break;
312 case ppir_op_mov:
313 f->op = ppir_codegen_vec4_acc_op_mov;
314 break;
315 case ppir_op_sum3:
316 f->op = ppir_codegen_vec4_acc_op_sum3;
317 dest_shift = 0;
318 break;
319 case ppir_op_sum4:
320 f->op = ppir_codegen_vec4_acc_op_sum4;
321 dest_shift = 0;
322 break;
323 case ppir_op_floor:
324 f->op = ppir_codegen_vec4_acc_op_floor;
325 break;
326 case ppir_op_ceil:
327 f->op = ppir_codegen_vec4_acc_op_ceil;
328 break;
329 case ppir_op_fract:
330 f->op = ppir_codegen_vec4_acc_op_fract;
331 break;
332 case ppir_op_gt:
333 f->op = ppir_codegen_vec4_acc_op_gt;
334 break;
335 case ppir_op_ge:
336 f->op = ppir_codegen_vec4_acc_op_ge;
337 break;
338 case ppir_op_eq:
339 f->op = ppir_codegen_vec4_acc_op_eq;
340 break;
341 case ppir_op_ne:
342 f->op = ppir_codegen_vec4_acc_op_ne;
343 break;
344 case ppir_op_select:
345 f->op = ppir_codegen_vec4_acc_op_sel;
346 break;
347 case ppir_op_max:
348 f->op = ppir_codegen_vec4_acc_op_max;
349 break;
350 case ppir_op_min:
351 f->op = ppir_codegen_vec4_acc_op_min;
352 break;
353 case ppir_op_ddx:
354 f->op = ppir_codegen_vec4_acc_op_dFdx;
355 break;
356 case ppir_op_ddy:
357 f->op = ppir_codegen_vec4_acc_op_dFdy;
358 break;
359 default:
360 break;
361 }
362
363 ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
364 index = ppir_target_get_src_reg_index(src);
365
366 if (src->type == ppir_target_pipeline &&
367 src->pipeline == ppir_pipeline_reg_vmul)
368 f->mul_in = true;
369 else
370 f->arg0_source = index >> 2;
371
372 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
373 f->arg0_absolute = src->absolute;
374 f->arg0_negate = src->negate;
375
376 if (++src < alu->src + alu->num_src) {
377 index = ppir_target_get_src_reg_index(src);
378 f->arg1_source = index >> 2;
379 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
380 f->arg1_absolute = src->absolute;
381 f->arg1_negate = src->negate;
382 }
383 }
384
385 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
386 {
387 ppir_codegen_field_float_acc *f = code;
388 ppir_alu_node *alu = ppir_node_to_alu(node);
389
390 ppir_dest *dest = &alu->dest;
391 int dest_component = ffs(dest->write_mask) - 1;
392 assert(dest_component >= 0);
393
394 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
395 f->output_en = true;
396 f->dest_modifier = dest->modifier;
397
398 switch (node->op) {
399 case ppir_op_add:
400 f->op = shift_to_op(alu->shift);
401 break;
402 case ppir_op_mov:
403 f->op = ppir_codegen_float_acc_op_mov;
404 break;
405 case ppir_op_max:
406 f->op = ppir_codegen_float_acc_op_max;
407 break;
408 case ppir_op_min:
409 f->op = ppir_codegen_float_acc_op_min;
410 break;
411 case ppir_op_floor:
412 f->op = ppir_codegen_float_acc_op_floor;
413 break;
414 case ppir_op_ceil:
415 f->op = ppir_codegen_float_acc_op_ceil;
416 break;
417 case ppir_op_fract:
418 f->op = ppir_codegen_float_acc_op_fract;
419 break;
420 case ppir_op_gt:
421 f->op = ppir_codegen_float_acc_op_gt;
422 break;
423 case ppir_op_ge:
424 f->op = ppir_codegen_float_acc_op_ge;
425 break;
426 case ppir_op_eq:
427 f->op = ppir_codegen_float_acc_op_eq;
428 break;
429 case ppir_op_ne:
430 f->op = ppir_codegen_float_acc_op_ne;
431 break;
432 case ppir_op_select:
433 f->op = ppir_codegen_float_acc_op_sel;
434 break;
435 case ppir_op_ddx:
436 f->op = ppir_codegen_float_acc_op_dFdx;
437 break;
438 case ppir_op_ddy:
439 f->op = ppir_codegen_float_acc_op_dFdy;
440 break;
441 default:
442 break;
443 }
444
445 ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
446 if (src->type == ppir_target_pipeline &&
447 src->pipeline == ppir_pipeline_reg_fmul)
448 f->mul_in = true;
449 else
450 f->arg0_source = get_scl_reg_index(src, dest_component);
451 f->arg0_absolute = src->absolute;
452 f->arg0_negate = src->negate;
453
454 if (++src < alu->src + alu->num_src) {
455 f->arg1_source = get_scl_reg_index(src, dest_component);
456 f->arg1_absolute = src->absolute;
457 f->arg1_negate = src->negate;
458 }
459 }
460
461 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
462 {
463 ppir_codegen_field_combine *f = code;
464 ppir_alu_node *alu = ppir_node_to_alu(node);
465
466 switch (node->op) {
467 case ppir_op_rsqrt:
468 case ppir_op_log2:
469 case ppir_op_exp2:
470 case ppir_op_rcp:
471 case ppir_op_sqrt:
472 case ppir_op_sin:
473 case ppir_op_cos:
474 {
475 f->scalar.dest_vec = false;
476 f->scalar.arg1_en = false;
477
478 ppir_dest *dest = &alu->dest;
479 int dest_component = ffs(dest->write_mask) - 1;
480 assert(dest_component >= 0);
481 f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
482 f->scalar.dest_modifier = dest->modifier;
483
484 ppir_src *src = alu->src;
485 f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
486 f->scalar.arg0_absolute = src->absolute;
487 f->scalar.arg0_negate = src->negate;
488
489 switch (node->op) {
490 case ppir_op_rsqrt:
491 f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
492 break;
493 case ppir_op_log2:
494 f->scalar.op = ppir_codegen_combine_scalar_op_log2;
495 break;
496 case ppir_op_exp2:
497 f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
498 break;
499 case ppir_op_rcp:
500 f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
501 break;
502 case ppir_op_sqrt:
503 f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
504 break;
505 case ppir_op_sin:
506 f->scalar.op = ppir_codegen_combine_scalar_op_sin;
507 break;
508 case ppir_op_cos:
509 f->scalar.op = ppir_codegen_combine_scalar_op_cos;
510 break;
511 default:
512 break;
513 }
514 }
515 default:
516 break;
517 }
518 }
519
520 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
521 {
522 assert(node->op == ppir_op_store_temp);
523
524 ppir_codegen_field_temp_write *f = code;
525 ppir_store_node *snode = ppir_node_to_store(node);
526 int num_components = snode->num_components;
527
528 f->temp_write.dest = 0x03; // 11 - temporary
529 f->temp_write.source = snode->src.reg->index;
530
531 int alignment = num_components == 4 ? 2 : num_components - 1;
532 f->temp_write.alignment = alignment;
533 f->temp_write.index = snode->index << (2 - alignment);
534
535 f->temp_write.offset_reg = snode->index >> 2;
536 }
537
538 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
539 {
540 for (int i = 0; i < constant->num; i++)
541 code[i] = util_float_to_half(constant->value[i].f);
542 }
543
544 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
545 {
546 ppir_codegen_field_branch *b = code;
547 assert(node->op == ppir_op_discard);
548
549 b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
550 b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
551 b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
552 }
553
554 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
555 {
556 ppir_codegen_field_branch *b = code;
557 ppir_branch_node *branch;
558 ppir_instr *target_instr;
559 if (node->op == ppir_op_discard) {
560 ppir_codegen_encode_discard(node, code);
561 return;
562 }
563
564 assert(node->op == ppir_op_branch);
565 branch = ppir_node_to_branch(node);
566
567 b->branch.unknown_0 = 0x0;
568 b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
569 b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
570 b->branch.cond_gt = branch->cond_gt;
571 b->branch.cond_eq = branch->cond_eq;
572 b->branch.cond_lt = branch->cond_lt;
573 b->branch.unknown_1 = 0x0;
574
575 target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
576 b->branch.target = target_instr->offset - node->instr->offset;
577 b->branch.next_count = target_instr->encode_size;
578 }
579
580 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
581
582 static const ppir_codegen_instr_slot_encode_func
583 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
584 [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
585 [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
586 [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
587 [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
588 [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
589 [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
590 [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
591 [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
592 [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
593 [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
594 };
595
596 static const int ppir_codegen_field_size[] = {
597 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
598 };
599
600 static inline int align_to_word(int size)
601 {
602 return ((size + 0x1f) >> 5);
603 }
604
605 static int get_instr_encode_size(ppir_instr *instr)
606 {
607 int size = 0;
608
609 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
610 if (instr->slots[i])
611 size += ppir_codegen_field_size[i];
612 }
613
614 for (int i = 0; i < 2; i++) {
615 if (instr->constant[i].num)
616 size += 64;
617 }
618
619 return align_to_word(size) + 1;
620 }
621
622 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
623 {
624 int off1 = dst_offset & 0x1f;
625 uint32_t *cpy_dst = dst, *cpy_src = src;
626
627 cpy_dst += (dst_offset >> 5);
628
629 if (off1) {
630 int off2 = 32 - off1;
631 int cpy_size = 0;
632 while (1) {
633 *cpy_dst |= *cpy_src << off1;
634 cpy_dst++;
635
636 cpy_size += off2;
637 if (cpy_size >= src_size)
638 break;
639
640 *cpy_dst |= *cpy_src >> off2;
641 cpy_src++;
642
643 cpy_size += off1;
644 if (cpy_size >= src_size)
645 break;
646 }
647 }
648 else
649 memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
650 }
651
652 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
653 {
654 int size = 0;
655 ppir_codegen_ctrl *ctrl = code;
656
657 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
658 if (instr->slots[i]) {
659 /* max field size (73), align to dword */
660 uint8_t output[12] = {0};
661
662 ppir_codegen_encode_slot[i](instr->slots[i], output);
663 bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
664
665 size += ppir_codegen_field_size[i];
666 ctrl->fields |= 1 << i;
667 }
668 }
669
670 if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
671 ctrl->sync = true;
672
673 if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
674 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
675 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
676 ctrl->sync = true;
677 }
678
679 if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
680 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
681 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
682 ctrl->sync = true;
683 }
684
685 for (int i = 0; i < 2; i++) {
686 if (instr->constant[i].num) {
687 uint16_t output[4] = {0};
688
689 ppir_codegen_encode_const(instr->constant + i, output);
690 bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
691
692 size += 64;
693 ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
694 }
695 }
696
697 size = align_to_word(size) + 1;
698
699 ctrl->count = size;
700 if (instr->is_end)
701 ctrl->stop = true;
702
703 if (last_code) {
704 ppir_codegen_ctrl *last_ctrl = last_code;
705 last_ctrl->next_count = size;
706 last_ctrl->prefetch = true;
707 }
708
709 return size;
710 }
711
712 static void ppir_codegen_print_prog(ppir_compiler *comp)
713 {
714 uint32_t *prog = comp->prog->shader;
715 unsigned offset = 0;
716
717 printf("========ppir codegen========\n");
718 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
719 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
720 printf("%03d (@%6d): ", instr->index, instr->offset);
721 int n = prog[0] & 0x1f;
722 for (int i = 0; i < n; i++) {
723 if (i && i % 6 == 0)
724 printf("\n ");
725 printf("%08x ", prog[i]);
726 }
727 printf("\n");
728 ppir_disassemble_instr(prog, offset);
729 prog += n;
730 offset += n;
731 }
732 }
733 printf("-----------------------\n");
734 }
735
736 bool ppir_codegen_prog(ppir_compiler *comp)
737 {
738 int size = 0;
739 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
740 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
741 instr->offset = size;
742 instr->encode_size = get_instr_encode_size(instr);
743 size += instr->encode_size;
744 }
745 }
746
747 uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
748 if (!prog)
749 return false;
750
751 uint32_t *code = prog, *last_code = NULL;
752 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
753 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
754 int offset = encode_instr(instr, code, last_code);
755 last_code = code;
756 code += offset;
757 }
758 }
759
760 comp->prog->shader = prog;
761 comp->prog->shader_size = size * sizeof(uint32_t);
762
763 if (lima_debug & LIMA_DEBUG_PP)
764 ppir_codegen_print_prog(comp);
765
766 return true;
767 }