lima/ppir: fix lod bias src
[mesa.git] / src / gallium / drivers / lima / ir / pp / codegen.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/ralloc.h"
26 #include "util/u_half.h"
27 #include "util/bitscan.h"
28
29 #include "ppir.h"
30 #include "codegen.h"
31 #include "lima_context.h"
32
33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34 {
35 unsigned ret = 0;
36 for (int i = 0; i < 4; i++)
37 ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38 return ret;
39 }
40
41 static int get_scl_reg_index(ppir_src *src, int component)
42 {
43 int ret = ppir_target_get_src_reg_index(src);
44 ret += src->swizzle[component];
45 return ret;
46 }
47
48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49 {
50 ppir_codegen_field_varying *f = code;
51 ppir_load_node *load = ppir_node_to_load(node);
52 ppir_dest *dest = &load->dest;
53 int index = ppir_target_get_dest_reg_index(dest);
54 int num_components = load->num_components;
55
56 if (node->op != ppir_op_load_coords_reg) {
57 assert(node->op == ppir_op_load_varying ||
58 node->op == ppir_op_load_coords ||
59 node->op == ppir_op_load_fragcoord ||
60 node->op == ppir_op_load_pointcoord ||
61 node->op == ppir_op_load_frontface);
62
63 f->imm.dest = index >> 2;
64 f->imm.mask = dest->write_mask << (index & 0x3);
65
66 int alignment = num_components == 3 ? 3 : num_components - 1;
67 f->imm.alignment = alignment;
68
69 if (load->num_src) {
70 index = ppir_target_get_src_reg_index(&load->src);
71 f->imm.offset_vector = index >> 2;
72 f->imm.offset_scalar = index & 0x3;
73 } else
74 f->imm.offset_vector = 0xf;
75
76 if (alignment == 3)
77 f->imm.index = load->index >> 2;
78 else
79 f->imm.index = load->index >> alignment;
80
81 switch (node->op) {
82 case ppir_op_load_fragcoord:
83 f->imm.source_type = 2;
84 f->imm.perspective = 3;
85 break;
86 case ppir_op_load_pointcoord:
87 f->imm.source_type = 3;
88 break;
89 case ppir_op_load_frontface:
90 f->imm.source_type = 3;
91 f->imm.perspective = 1;
92 break;
93 case ppir_op_load_coords:
94 /* num_components == 3 implies cubemap as we don't support 3D textures */
95 f->imm.source_type = num_components == 3 ? 2 : 0;
96 break;
97 default:
98 break;
99 }
100 }
101 else { /* node->op == ppir_op_load_coords_reg */
102 f->reg.dest = index >> 2;
103 f->reg.mask = dest->write_mask << (index & 0x3);
104
105 if (load->num_src) {
106 /* num_components == 3 implies cubemap as we don't support 3D textures */
107 if (num_components == 3) {
108 f->reg.source_type = 2;
109 f->reg.perspective = 1;
110 } else {
111 f->reg.source_type = 1;
112 }
113 ppir_src *src = &load->src;
114 index = ppir_target_get_src_reg_index(src);
115 f->reg.source = index >> 2;
116 f->reg.negate = src->negate;
117 f->reg.absolute = src->absolute;
118 f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
119 }
120 }
121 }
122
123 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
124 {
125 ppir_codegen_field_sampler *f = code;
126 ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
127
128 f->index = ldtex->sampler;
129
130 f->lod_bias_en = ldtex->lod_bias_en;
131 f->explicit_lod = ldtex->explicit_lod;
132 if (ldtex->lod_bias_en)
133 ppir_target_get_src_reg_index(&ldtex->src[1]);
134
135 switch (ldtex->sampler_dim) {
136 case GLSL_SAMPLER_DIM_2D:
137 case GLSL_SAMPLER_DIM_RECT:
138 case GLSL_SAMPLER_DIM_EXTERNAL:
139 f->type = ppir_codegen_sampler_type_2d;
140 break;
141 case GLSL_SAMPLER_DIM_CUBE:
142 f->type = ppir_codegen_sampler_type_cube;
143 break;
144 default:
145 break;
146 }
147
148 f->offset_en = 0;
149 f->unknown_2 = 0x39001;
150 }
151
152 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
153 {
154 ppir_codegen_field_uniform *f = code;
155 ppir_load_node *load = ppir_node_to_load(node);
156
157 switch (node->op) {
158 case ppir_op_load_uniform:
159 f->source = ppir_codegen_uniform_src_uniform;
160 break;
161 case ppir_op_load_temp:
162 f->source = ppir_codegen_uniform_src_temporary;
163 break;
164 default:
165 assert(0);
166 }
167
168 /* Uniforms are always aligned to vec4 boundary */
169 f->alignment = 2;
170 f->index = load->index;
171
172 if (load->num_src) {
173 f->offset_en = 1;
174 f->offset_reg = ppir_target_get_src_reg_index(&load->src);
175 }
176 }
177
178 static unsigned shift_to_op(int shift)
179 {
180 assert(shift >= -3 && shift <= 3);
181 return shift < 0 ? shift + 8 : shift;
182 }
183
184 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
185 {
186 ppir_codegen_field_vec4_mul *f = code;
187 ppir_alu_node *alu = ppir_node_to_alu(node);
188
189 ppir_dest *dest = &alu->dest;
190 int dest_shift = 0;
191 if (dest->type != ppir_target_pipeline) {
192 int index = ppir_target_get_dest_reg_index(dest);
193 dest_shift = index & 0x3;
194 f->dest = index >> 2;
195 f->mask = dest->write_mask << dest_shift;
196 }
197 f->dest_modifier = dest->modifier;
198
199 switch (node->op) {
200 case ppir_op_mul:
201 f->op = shift_to_op(alu->shift);
202 break;
203 case ppir_op_mov:
204 case ppir_op_store_color:
205 f->op = ppir_codegen_vec4_mul_op_mov;
206 break;
207 case ppir_op_max:
208 f->op = ppir_codegen_vec4_mul_op_max;
209 break;
210 case ppir_op_min:
211 f->op = ppir_codegen_vec4_mul_op_min;
212 break;
213 case ppir_op_and:
214 f->op = ppir_codegen_vec4_mul_op_and;
215 break;
216 case ppir_op_or:
217 f->op = ppir_codegen_vec4_mul_op_or;
218 break;
219 case ppir_op_xor:
220 f->op = ppir_codegen_vec4_mul_op_xor;
221 break;
222 case ppir_op_gt:
223 f->op = ppir_codegen_vec4_mul_op_gt;
224 break;
225 case ppir_op_ge:
226 f->op = ppir_codegen_vec4_mul_op_ge;
227 break;
228 case ppir_op_eq:
229 f->op = ppir_codegen_vec4_mul_op_eq;
230 break;
231 case ppir_op_ne:
232 f->op = ppir_codegen_vec4_mul_op_ne;
233 break;
234 case ppir_op_not:
235 f->op = ppir_codegen_vec4_mul_op_not;
236 break;
237 default:
238 break;
239 }
240
241 ppir_src *src = alu->src;
242 int index = ppir_target_get_src_reg_index(src);
243 f->arg0_source = index >> 2;
244 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
245 f->arg0_absolute = src->absolute;
246 f->arg0_negate = src->negate;
247
248 if (alu->num_src == 2) {
249 src = alu->src + 1;
250 index = ppir_target_get_src_reg_index(src);
251 f->arg1_source = index >> 2;
252 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
253 f->arg1_absolute = src->absolute;
254 f->arg1_negate = src->negate;
255 }
256 }
257
258 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
259 {
260 ppir_codegen_field_float_mul *f = code;
261 ppir_alu_node *alu = ppir_node_to_alu(node);
262
263 ppir_dest *dest = &alu->dest;
264 int dest_component = ffs(dest->write_mask) - 1;
265 assert(dest_component >= 0);
266
267 if (dest->type != ppir_target_pipeline) {
268 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
269 f->output_en = true;
270 }
271 f->dest_modifier = dest->modifier;
272
273 switch (node->op) {
274 case ppir_op_mul:
275 f->op = shift_to_op(alu->shift);
276 break;
277 case ppir_op_mov:
278 f->op = ppir_codegen_float_mul_op_mov;
279 break;
280 case ppir_op_sel_cond:
281 f->op = ppir_codegen_float_mul_op_mov;
282 break;
283 case ppir_op_max:
284 f->op = ppir_codegen_float_mul_op_max;
285 break;
286 case ppir_op_min:
287 f->op = ppir_codegen_float_mul_op_min;
288 break;
289 case ppir_op_and:
290 f->op = ppir_codegen_float_mul_op_and;
291 break;
292 case ppir_op_or:
293 f->op = ppir_codegen_float_mul_op_or;
294 break;
295 case ppir_op_xor:
296 f->op = ppir_codegen_float_mul_op_xor;
297 break;
298 case ppir_op_gt:
299 f->op = ppir_codegen_float_mul_op_gt;
300 break;
301 case ppir_op_ge:
302 f->op = ppir_codegen_float_mul_op_ge;
303 break;
304 case ppir_op_eq:
305 f->op = ppir_codegen_float_mul_op_eq;
306 break;
307 case ppir_op_ne:
308 f->op = ppir_codegen_float_mul_op_ne;
309 break;
310 case ppir_op_not:
311 f->op = ppir_codegen_float_mul_op_not;
312 break;
313 default:
314 break;
315 }
316
317 ppir_src *src = alu->src;
318 f->arg0_source = get_scl_reg_index(src, dest_component);
319 f->arg0_absolute = src->absolute;
320 f->arg0_negate = src->negate;
321
322 if (alu->num_src == 2) {
323 src = alu->src + 1;
324 f->arg1_source = get_scl_reg_index(src, dest_component);
325 f->arg1_absolute = src->absolute;
326 f->arg1_negate = src->negate;
327 }
328 }
329
330 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
331 {
332 ppir_codegen_field_vec4_acc *f = code;
333 ppir_alu_node *alu = ppir_node_to_alu(node);
334
335 ppir_dest *dest = &alu->dest;
336 int index = ppir_target_get_dest_reg_index(dest);
337 int dest_shift = index & 0x3;
338 f->dest = index >> 2;
339 f->mask = dest->write_mask << dest_shift;
340 f->dest_modifier = dest->modifier;
341
342 switch (node->op) {
343 case ppir_op_add:
344 f->op = ppir_codegen_vec4_acc_op_add;
345 break;
346 case ppir_op_mov:
347 case ppir_op_store_color:
348 f->op = ppir_codegen_vec4_acc_op_mov;
349 break;
350 case ppir_op_sum3:
351 f->op = ppir_codegen_vec4_acc_op_sum3;
352 dest_shift = 0;
353 break;
354 case ppir_op_sum4:
355 f->op = ppir_codegen_vec4_acc_op_sum4;
356 dest_shift = 0;
357 break;
358 case ppir_op_floor:
359 f->op = ppir_codegen_vec4_acc_op_floor;
360 break;
361 case ppir_op_ceil:
362 f->op = ppir_codegen_vec4_acc_op_ceil;
363 break;
364 case ppir_op_fract:
365 f->op = ppir_codegen_vec4_acc_op_fract;
366 break;
367 case ppir_op_gt:
368 f->op = ppir_codegen_vec4_acc_op_gt;
369 break;
370 case ppir_op_ge:
371 f->op = ppir_codegen_vec4_acc_op_ge;
372 break;
373 case ppir_op_eq:
374 f->op = ppir_codegen_vec4_acc_op_eq;
375 break;
376 case ppir_op_ne:
377 f->op = ppir_codegen_vec4_acc_op_ne;
378 break;
379 case ppir_op_select:
380 f->op = ppir_codegen_vec4_acc_op_sel;
381 break;
382 case ppir_op_max:
383 f->op = ppir_codegen_vec4_acc_op_max;
384 break;
385 case ppir_op_min:
386 f->op = ppir_codegen_vec4_acc_op_min;
387 break;
388 case ppir_op_ddx:
389 f->op = ppir_codegen_vec4_acc_op_dFdx;
390 break;
391 case ppir_op_ddy:
392 f->op = ppir_codegen_vec4_acc_op_dFdy;
393 break;
394 default:
395 break;
396 }
397
398 ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
399 index = ppir_target_get_src_reg_index(src);
400
401 if (src->type == ppir_target_pipeline &&
402 src->pipeline == ppir_pipeline_reg_vmul)
403 f->mul_in = true;
404 else
405 f->arg0_source = index >> 2;
406
407 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
408 f->arg0_absolute = src->absolute;
409 f->arg0_negate = src->negate;
410
411 if (++src < alu->src + alu->num_src) {
412 index = ppir_target_get_src_reg_index(src);
413 f->arg1_source = index >> 2;
414 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
415 f->arg1_absolute = src->absolute;
416 f->arg1_negate = src->negate;
417 }
418 }
419
420 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
421 {
422 ppir_codegen_field_float_acc *f = code;
423 ppir_alu_node *alu = ppir_node_to_alu(node);
424
425 ppir_dest *dest = &alu->dest;
426 int dest_component = ffs(dest->write_mask) - 1;
427 assert(dest_component >= 0);
428
429 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
430 f->output_en = true;
431 f->dest_modifier = dest->modifier;
432
433 switch (node->op) {
434 case ppir_op_add:
435 f->op = shift_to_op(alu->shift);
436 break;
437 case ppir_op_mov:
438 f->op = ppir_codegen_float_acc_op_mov;
439 break;
440 case ppir_op_max:
441 f->op = ppir_codegen_float_acc_op_max;
442 break;
443 case ppir_op_min:
444 f->op = ppir_codegen_float_acc_op_min;
445 break;
446 case ppir_op_floor:
447 f->op = ppir_codegen_float_acc_op_floor;
448 break;
449 case ppir_op_ceil:
450 f->op = ppir_codegen_float_acc_op_ceil;
451 break;
452 case ppir_op_fract:
453 f->op = ppir_codegen_float_acc_op_fract;
454 break;
455 case ppir_op_gt:
456 f->op = ppir_codegen_float_acc_op_gt;
457 break;
458 case ppir_op_ge:
459 f->op = ppir_codegen_float_acc_op_ge;
460 break;
461 case ppir_op_eq:
462 f->op = ppir_codegen_float_acc_op_eq;
463 break;
464 case ppir_op_ne:
465 f->op = ppir_codegen_float_acc_op_ne;
466 break;
467 case ppir_op_select:
468 f->op = ppir_codegen_float_acc_op_sel;
469 break;
470 case ppir_op_ddx:
471 f->op = ppir_codegen_float_acc_op_dFdx;
472 break;
473 case ppir_op_ddy:
474 f->op = ppir_codegen_float_acc_op_dFdy;
475 break;
476 default:
477 break;
478 }
479
480 ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
481 if (src->type == ppir_target_pipeline &&
482 src->pipeline == ppir_pipeline_reg_fmul)
483 f->mul_in = true;
484 else
485 f->arg0_source = get_scl_reg_index(src, dest_component);
486 f->arg0_absolute = src->absolute;
487 f->arg0_negate = src->negate;
488
489 if (++src < alu->src + alu->num_src) {
490 f->arg1_source = get_scl_reg_index(src, dest_component);
491 f->arg1_absolute = src->absolute;
492 f->arg1_negate = src->negate;
493 }
494 }
495
496 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
497 {
498 ppir_codegen_field_combine *f = code;
499 ppir_alu_node *alu = ppir_node_to_alu(node);
500
501 switch (node->op) {
502 case ppir_op_rsqrt:
503 case ppir_op_log2:
504 case ppir_op_exp2:
505 case ppir_op_rcp:
506 case ppir_op_sqrt:
507 case ppir_op_sin:
508 case ppir_op_cos:
509 {
510 f->scalar.dest_vec = false;
511 f->scalar.arg1_en = false;
512
513 ppir_dest *dest = &alu->dest;
514 int dest_component = ffs(dest->write_mask) - 1;
515 assert(dest_component >= 0);
516 f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
517 f->scalar.dest_modifier = dest->modifier;
518
519 ppir_src *src = alu->src;
520 f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
521 f->scalar.arg0_absolute = src->absolute;
522 f->scalar.arg0_negate = src->negate;
523
524 switch (node->op) {
525 case ppir_op_rsqrt:
526 f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
527 break;
528 case ppir_op_log2:
529 f->scalar.op = ppir_codegen_combine_scalar_op_log2;
530 break;
531 case ppir_op_exp2:
532 f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
533 break;
534 case ppir_op_rcp:
535 f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
536 break;
537 case ppir_op_sqrt:
538 f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
539 break;
540 case ppir_op_sin:
541 f->scalar.op = ppir_codegen_combine_scalar_op_sin;
542 break;
543 case ppir_op_cos:
544 f->scalar.op = ppir_codegen_combine_scalar_op_cos;
545 break;
546 default:
547 break;
548 }
549 }
550 default:
551 break;
552 }
553 }
554
555 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
556 {
557 assert(node->op == ppir_op_store_temp);
558
559 ppir_codegen_field_temp_write *f = code;
560 ppir_store_node *snode = ppir_node_to_store(node);
561 int num_components = snode->num_components;
562
563 f->temp_write.dest = 0x03; // 11 - temporary
564 f->temp_write.source = snode->src.reg->index;
565
566 int alignment = num_components == 4 ? 2 : num_components - 1;
567 f->temp_write.alignment = alignment;
568 f->temp_write.index = snode->index << (2 - alignment);
569
570 f->temp_write.offset_reg = snode->index >> 2;
571 }
572
573 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
574 {
575 for (int i = 0; i < constant->num; i++)
576 code[i] = util_float_to_half(constant->value[i].f);
577 }
578
579 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
580 {
581 ppir_codegen_field_branch *b = code;
582 assert(node->op == ppir_op_discard);
583
584 b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
585 b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
586 b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
587 }
588
589 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
590 {
591 ppir_codegen_field_branch *b = code;
592 ppir_branch_node *branch;
593 ppir_instr *target_instr;
594 ppir_block *target;
595 if (node->op == ppir_op_discard) {
596 ppir_codegen_encode_discard(node, code);
597 return;
598 }
599
600 assert(node->op == ppir_op_branch);
601 branch = ppir_node_to_branch(node);
602
603 b->branch.unknown_0 = 0x0;
604 b->branch.unknown_1 = 0x0;
605
606 if (branch->num_src == 2) {
607 b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
608 b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
609 b->branch.cond_gt = branch->cond_gt;
610 b->branch.cond_eq = branch->cond_eq;
611 b->branch.cond_lt = branch->cond_lt;
612 } else if (branch->num_src == 0) {
613 /* Unconditional branch */
614 b->branch.arg0_source = 0;
615 b->branch.arg1_source = 0;
616 b->branch.cond_gt = true;
617 b->branch.cond_eq = true;
618 b->branch.cond_lt = true;
619 } else {
620 assert(false);
621 }
622
623 target = branch->target;
624 while (list_is_empty(&target->instr_list)) {
625 if (!target->list.next)
626 break;
627 target = LIST_ENTRY(ppir_block, target->list.next, list);
628 }
629
630 assert(!list_is_empty(&target->instr_list));
631
632 target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
633 b->branch.target = target_instr->offset - node->instr->offset;
634 b->branch.next_count = target_instr->encode_size;
635 }
636
637 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
638
639 static const ppir_codegen_instr_slot_encode_func
640 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
641 [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
642 [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
643 [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
644 [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
645 [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
646 [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
647 [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
648 [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
649 [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
650 [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
651 };
652
653 static const int ppir_codegen_field_size[] = {
654 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
655 };
656
657 static inline int align_to_word(int size)
658 {
659 return ((size + 0x1f) >> 5);
660 }
661
662 static int get_instr_encode_size(ppir_instr *instr)
663 {
664 int size = 0;
665
666 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
667 if (instr->slots[i])
668 size += ppir_codegen_field_size[i];
669 }
670
671 for (int i = 0; i < 2; i++) {
672 if (instr->constant[i].num)
673 size += 64;
674 }
675
676 return align_to_word(size) + 1;
677 }
678
679 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
680 {
681 int off1 = dst_offset & 0x1f;
682 uint32_t *cpy_dst = dst, *cpy_src = src;
683
684 cpy_dst += (dst_offset >> 5);
685
686 if (off1) {
687 int off2 = 32 - off1;
688 int cpy_size = 0;
689 while (1) {
690 *cpy_dst |= *cpy_src << off1;
691 cpy_dst++;
692
693 cpy_size += off2;
694 if (cpy_size >= src_size)
695 break;
696
697 *cpy_dst |= *cpy_src >> off2;
698 cpy_src++;
699
700 cpy_size += off1;
701 if (cpy_size >= src_size)
702 break;
703 }
704 }
705 else
706 memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
707 }
708
709 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
710 {
711 int size = 0;
712 ppir_codegen_ctrl *ctrl = code;
713
714 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
715 if (instr->slots[i]) {
716 /* max field size (73), align to dword */
717 uint8_t output[12] = {0};
718
719 ppir_codegen_encode_slot[i](instr->slots[i], output);
720 bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
721
722 size += ppir_codegen_field_size[i];
723 ctrl->fields |= 1 << i;
724 }
725 }
726
727 if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
728 ctrl->sync = true;
729
730 if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
731 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
732 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
733 ctrl->sync = true;
734 }
735
736 if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
737 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
738 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
739 ctrl->sync = true;
740 }
741
742 for (int i = 0; i < 2; i++) {
743 if (instr->constant[i].num) {
744 uint16_t output[4] = {0};
745
746 ppir_codegen_encode_const(instr->constant + i, output);
747 bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
748
749 size += 64;
750 ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
751 }
752 }
753
754 size = align_to_word(size) + 1;
755
756 ctrl->count = size;
757 if (instr->is_end)
758 ctrl->stop = true;
759
760 if (last_code) {
761 ppir_codegen_ctrl *last_ctrl = last_code;
762 last_ctrl->next_count = size;
763 last_ctrl->prefetch = true;
764 }
765
766 return size;
767 }
768
769 static void ppir_codegen_print_prog(ppir_compiler *comp)
770 {
771 uint32_t *prog = comp->prog->shader;
772 unsigned offset = 0;
773
774 printf("========ppir codegen========\n");
775 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
776 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
777 printf("%03d (@%6d): ", instr->index, instr->offset);
778 int n = prog[0] & 0x1f;
779 for (int i = 0; i < n; i++) {
780 if (i && i % 6 == 0)
781 printf("\n ");
782 printf("%08x ", prog[i]);
783 }
784 printf("\n");
785 ppir_disassemble_instr(prog, offset);
786 prog += n;
787 offset += n;
788 }
789 }
790 printf("-----------------------\n");
791 }
792
793 bool ppir_codegen_prog(ppir_compiler *comp)
794 {
795 int size = 0;
796 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
797 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
798 instr->offset = size;
799 instr->encode_size = get_instr_encode_size(instr);
800 size += instr->encode_size;
801 }
802 }
803
804 uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
805 if (!prog)
806 return false;
807
808 uint32_t *code = prog, *last_code = NULL;
809 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
810 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
811 int offset = encode_instr(instr, code, last_code);
812 last_code = code;
813 code += offset;
814 }
815 }
816
817 comp->prog->shader = prog;
818 comp->prog->shader_size = size * sizeof(uint32_t);
819
820 if (lima_debug & LIMA_DEBUG_PP)
821 ppir_codegen_print_prog(comp);
822
823 return true;
824 }