9941b27d61480f83f0b353cad5a80b4eccbdfec6
[mesa.git] / src / panfrost / bifrost / bi_pack.c
1 /*
2 * Copyright (C) 2020 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "compiler.h"
25
26 #define RETURN_PACKED(str) { \
27 uint64_t temp = 0; \
28 memcpy(&temp, &str, sizeof(str)); \
29 return temp; \
30 }
31
32 /* This file contains the final passes of the compiler. Running after
33 * scheduling and RA, the IR is now finalized, so we need to emit it to actual
34 * bits on the wire (as well as fixup branches) */
35
36 static uint64_t
37 bi_pack_header(bi_clause *clause, bi_clause *next, bool is_fragment)
38 {
39 struct bifrost_header header = {
40 .back_to_back = clause->back_to_back,
41 .no_end_of_shader = (next != NULL),
42 .elide_writes = is_fragment,
43 .branch_cond = clause->branch_conditional,
44 .datareg_writebarrier = clause->data_register_write_barrier,
45 .datareg = clause->data_register,
46 .scoreboard_deps = clause->dependencies,
47 .scoreboard_index = clause->scoreboard_id,
48 .clause_type = clause->clause_type,
49 .next_clause_type = next ? next->clause_type : 0,
50 };
51
52 uint64_t u = 0;
53 memcpy(&u, &header, sizeof(header));
54 return u;
55 }
56
57 /* Represents the assignment of ports for a given bundle */
58
59 struct bi_registers {
60 /* Register to assign to each port */
61 unsigned port[4];
62
63 /* Read ports can be disabled */
64 bool enabled[2];
65
66 /* Should we write FMA? what about ADD? If only a single port is
67 * enabled it is in port 2, else ADD/FMA is 2/3 respectively */
68 bool write_fma, write_add;
69
70 /* Should we read with port 3? */
71 bool read_port3;
72
73 /* Packed uniform/constant */
74 uint8_t uniform_constant;
75
76 /* Whether writes are actually for the last instruction */
77 bool first_instruction;
78 };
79
80 /* The uniform/constant slot allows loading a contiguous 64-bit immediate or
81 * pushed uniform per bundle. Figure out which one we need in the bundle (the
82 * scheduler needs to ensure we only have one type per bundle), validate
83 * everything, and rewrite away the register/uniform indices to use 3-bit
84 * sources directly. */
85
86 static unsigned
87 bi_lookup_constant(bi_clause *clause, uint64_t cons)
88 {
89 for (unsigned i = 0; i < clause->constant_count; ++i) {
90 /* Only check top 60-bits since that's what's actually embedded
91 * in the clause, the bottom 4-bits are bundle-inline */
92
93 if ((cons >> 4) == (clause->constants[i] >> 4))
94 return i;
95 }
96
97 unreachable("Invalid constant accessed");
98 }
99
100 static unsigned
101 bi_constant_field(unsigned idx)
102 {
103 assert(idx <= 5);
104
105 const unsigned values[] = {
106 4, 5, 6, 7, 2, 3
107 };
108
109 return values[idx] << 4;
110 }
111
112 static bool
113 bi_assign_uniform_constant_single(
114 struct bi_registers *regs,
115 bi_clause *clause,
116 bi_instruction *ins, bool assigned, bool fast_zero)
117 {
118 if (!ins)
119 return assigned;
120
121 bi_foreach_src(ins, s) {
122 if (s == 0 && (ins->type == BI_LOAD_VAR_ADDRESS || ins->type == BI_LOAD_ATTR)) continue;
123
124 if (ins->src[s] & BIR_INDEX_CONSTANT) {
125 /* TODO: lo/hi matching? */
126 uint64_t cons = ins->constant.u64;
127 unsigned idx = bi_lookup_constant(clause, cons);
128 unsigned f = bi_constant_field(idx) | (cons & 0xF);
129
130 if (assigned && regs->uniform_constant != f)
131 unreachable("Mismatched uniform/const field: imm");
132
133 regs->uniform_constant = f;
134 ins->src[s] = BIR_INDEX_PASS | BIFROST_SRC_CONST_LO;
135 assigned = true;
136 } else if (ins->src[s] & BIR_INDEX_ZERO && (ins->type == BI_LOAD_UNIFORM || ins->type == BI_LOAD_VAR)) {
137 /* XXX: HACK UNTIL WE HAVE HI MATCHING DUE TO OVERFLOW XXX */
138 ins->src[s] = BIR_INDEX_PASS | BIFROST_SRC_CONST_HI;
139 } else if (ins->src[s] & BIR_INDEX_ZERO && !fast_zero) {
140 /* FMAs have a fast zero port, ADD needs to use the
141 * uniform/const port's special 0 mode handled here */
142 unsigned f = 0;
143
144 if (assigned && regs->uniform_constant != f)
145 unreachable("Mismatched uniform/const field: 0");
146
147 regs->uniform_constant = f;
148 ins->src[s] = BIR_INDEX_PASS | BIFROST_SRC_CONST_LO;
149 assigned = true;
150 } else if (s & BIR_INDEX_UNIFORM) {
151 unreachable("Push uniforms not implemented yet");
152 }
153 }
154
155 return assigned;
156 }
157
158 static void
159 bi_assign_uniform_constant(
160 bi_clause *clause,
161 struct bi_registers *regs,
162 bi_bundle bundle)
163 {
164 bool assigned =
165 bi_assign_uniform_constant_single(regs, clause, bundle.fma, false, true);
166
167 bi_assign_uniform_constant_single(regs, clause, bundle.add, assigned, false);
168 }
169
170 /* Assigns a port for reading, before anything is written */
171
172 static void
173 bi_assign_port_read(struct bi_registers *regs, unsigned src)
174 {
175 /* We only assign for registers */
176 if (!(src & BIR_INDEX_REGISTER))
177 return;
178
179 unsigned reg = src & ~BIR_INDEX_REGISTER;
180
181 /* Check if we already assigned the port */
182 for (unsigned i = 0; i <= 1; ++i) {
183 if (regs->port[i] == reg && regs->enabled[i])
184 return;
185 }
186
187 if (regs->port[3] == reg && regs->read_port3)
188 return;
189
190 /* Assign it now */
191
192 for (unsigned i = 0; i <= 1; ++i) {
193 if (!regs->enabled[i]) {
194 regs->port[i] = reg;
195 regs->enabled[i] = true;
196 return;
197 }
198 }
199
200 if (!regs->read_port3) {
201 regs->port[3] = reg;
202 regs->read_port3 = true;
203 }
204 }
205
206 static struct bi_registers
207 bi_assign_ports(bi_bundle now, bi_bundle prev)
208 {
209 struct bi_registers regs = { 0 };
210
211 /* We assign ports for the main register mechanism. Special ops
212 * use the data registers, which has its own mechanism entirely
213 * and thus gets skipped over here. */
214
215 unsigned read_dreg = now.add &&
216 bi_class_props[now.add->type] & BI_DATA_REG_SRC;
217
218 unsigned write_dreg = prev.add &&
219 bi_class_props[prev.add->type] & BI_DATA_REG_DEST;
220
221 /* First, assign reads */
222
223 if (now.fma)
224 bi_foreach_src(now.fma, src)
225 bi_assign_port_read(&regs, now.fma->src[src]);
226
227 if (now.add) {
228 bi_foreach_src(now.add, src) {
229 if (!(src == 0 && read_dreg))
230 bi_assign_port_read(&regs, now.add->src[src]);
231 }
232 }
233
234 /* Next, assign writes */
235
236 if (prev.fma && prev.fma->dest & BIR_INDEX_REGISTER) {
237 regs.port[2] = prev.fma->dest & ~BIR_INDEX_REGISTER;
238 regs.write_fma = true;
239 }
240
241 if (prev.add && prev.add->dest & BIR_INDEX_REGISTER && !write_dreg) {
242 unsigned r = prev.add->dest & ~BIR_INDEX_REGISTER;
243
244 if (regs.write_fma) {
245 /* Scheduler constraint: cannot read 3 and write 2 */
246 assert(!regs.read_port3);
247 regs.port[3] = r;
248 } else {
249 regs.port[2] = r;
250 }
251
252 regs.write_add = true;
253 }
254
255 /* Finally, ensure port 1 > port 0 for the 63-x trick to function */
256
257 if (regs.enabled[0] && regs.enabled[1] && regs.port[1] < regs.port[0]) {
258 unsigned temp = regs.port[0];
259 regs.port[0] = regs.port[1];
260 regs.port[1] = temp;
261 }
262
263 return regs;
264 }
265
266 /* Determines the register control field, ignoring the first? flag */
267
268 static enum bifrost_reg_control
269 bi_pack_register_ctrl_lo(struct bi_registers r)
270 {
271 if (r.write_fma) {
272 if (r.write_add) {
273 assert(!r.read_port3);
274 return BIFROST_WRITE_ADD_P2_FMA_P3;
275 } else {
276 if (r.read_port3)
277 return BIFROST_WRITE_FMA_P2_READ_P3;
278 else
279 return BIFROST_WRITE_FMA_P2;
280 }
281 } else if (r.write_add) {
282 if (r.read_port3)
283 return BIFROST_WRITE_ADD_P2_READ_P3;
284 else
285 return BIFROST_WRITE_ADD_P2;
286 } else if (r.read_port3)
287 return BIFROST_READ_P3;
288 else
289 return BIFROST_REG_NONE;
290 }
291
292 /* Ditto but account for the first? flag this time */
293
294 static enum bifrost_reg_control
295 bi_pack_register_ctrl(struct bi_registers r)
296 {
297 enum bifrost_reg_control ctrl = bi_pack_register_ctrl_lo(r);
298
299 if (r.first_instruction) {
300 if (ctrl == BIFROST_REG_NONE)
301 ctrl = BIFROST_FIRST_NONE;
302 else
303 ctrl |= BIFROST_FIRST_NONE;
304 }
305
306 return ctrl;
307 }
308
309 static uint64_t
310 bi_pack_registers(struct bi_registers regs)
311 {
312 enum bifrost_reg_control ctrl = bi_pack_register_ctrl(regs);
313 struct bifrost_regs s;
314 uint64_t packed = 0;
315
316 if (regs.enabled[1]) {
317 /* Gotta save that bit!~ Required by the 63-x trick */
318 assert(regs.port[1] > regs.port[0]);
319 assert(regs.enabled[0]);
320
321 /* Do the 63-x trick, see docs/disasm */
322 if (regs.port[0] > 31) {
323 regs.port[0] = 63 - regs.port[0];
324 regs.port[1] = 63 - regs.port[1];
325 }
326
327 assert(regs.port[0] <= 31);
328 assert(regs.port[1] <= 63);
329
330 s.ctrl = ctrl;
331 s.reg1 = regs.port[1];
332 s.reg0 = regs.port[0];
333 } else {
334 /* Port 1 disabled, so set to zero and use port 1 for ctrl */
335 s.reg1 = ctrl << 2;
336
337 if (regs.enabled[0]) {
338 /* Bit 0 upper bit of port 0 */
339 s.reg1 |= (regs.port[0] >> 5);
340
341 /* Rest of port 0 in usual spot */
342 s.reg0 = (regs.port[0] & 0b11111);
343 } else {
344 /* Bit 1 set if port 0 also disabled */
345 s.reg1 |= (1 << 1);
346 }
347 }
348
349 s.reg3 = regs.port[3];
350 s.reg2 = regs.port[2];
351 s.uniform_const = regs.uniform_constant;
352
353 memcpy(&packed, &s, sizeof(s));
354 return packed;
355 }
356
357 static void
358 bi_set_data_register(bi_clause *clause, unsigned idx)
359 {
360 assert(idx & BIR_INDEX_REGISTER);
361 unsigned reg = idx & ~BIR_INDEX_REGISTER;
362 assert(reg <= 63);
363 clause->data_register = reg;
364 }
365
366 static void
367 bi_read_data_register(bi_clause *clause, bi_instruction *ins)
368 {
369 bi_set_data_register(clause, ins->src[0]);
370 }
371
372 static void
373 bi_write_data_register(bi_clause *clause, bi_instruction *ins)
374 {
375 bi_set_data_register(clause, ins->dest);
376 }
377
378 static enum bifrost_packed_src
379 bi_get_src_reg_port(struct bi_registers *regs, unsigned src)
380 {
381 unsigned reg = src & ~BIR_INDEX_REGISTER;
382
383 if (regs->port[0] == reg && regs->enabled[0])
384 return BIFROST_SRC_PORT0;
385 else if (regs->port[1] == reg && regs->enabled[1])
386 return BIFROST_SRC_PORT1;
387 else if (regs->port[3] == reg && regs->read_port3)
388 return BIFROST_SRC_PORT3;
389 else
390 unreachable("Tried to access register with no port");
391 }
392
393 static enum bifrost_packed_src
394 bi_get_src(bi_instruction *ins, struct bi_registers *regs, unsigned s, bool is_fma)
395 {
396 unsigned src = ins->src[s];
397
398 if (src & BIR_INDEX_REGISTER)
399 return bi_get_src_reg_port(regs, src);
400 else if (src & BIR_INDEX_ZERO && is_fma)
401 return BIFROST_SRC_STAGE;
402 else if (src & BIR_INDEX_PASS)
403 return src & ~BIR_INDEX_PASS;
404 else
405 unreachable("Unknown src");
406 }
407
408 static unsigned
409 bi_pack_fma_fma(bi_instruction *ins, struct bi_registers *regs)
410 {
411 /* (-a)(-b) = ab, so we only need one negate bit */
412 bool negate_mul = ins->src_neg[0] ^ ins->src_neg[1];
413
414 struct bifrost_fma_fma pack = {
415 .src0 = bi_get_src(ins, regs, 0, true),
416 .src1 = bi_get_src(ins, regs, 1, true),
417 .src2 = bi_get_src(ins, regs, 2, true),
418 .src0_abs = ins->src_abs[0],
419 .src1_abs = ins->src_abs[1],
420 .src2_abs = ins->src_abs[2],
421 .src0_neg = negate_mul,
422 .src2_neg = ins->src_neg[2],
423 .op = BIFROST_FMA_OP_FMA
424 };
425
426 RETURN_PACKED(pack);
427 }
428
429 static unsigned
430 bi_pack_fma_add(bi_instruction *ins, struct bi_registers *regs)
431 {
432 /* TODO: fadd16 packing is a bit different */
433 assert(ins->dest_type == nir_type_float32);
434
435 struct bifrost_fma_add pack = {
436 .src0 = bi_get_src(ins, regs, 0, true),
437 .src1 = bi_get_src(ins, regs, 1, true),
438 .src0_abs = ins->src_abs[0],
439 .src1_abs = ins->src_abs[1],
440 .src0_neg = ins->src_neg[0],
441 .src1_neg = ins->src_neg[1],
442 .unk = 0x0,
443 .outmod = ins->outmod,
444 .roundmode = ins->roundmode,
445 .op = BIFROST_FMA_OP_FADD32
446 };
447
448 RETURN_PACKED(pack);
449 }
450
451 static unsigned
452 bi_pack_fma_1src(bi_instruction *ins, struct bi_registers *regs, unsigned op)
453 {
454 struct bifrost_fma_inst pack = {
455 .src0 = bi_get_src(ins, regs, 0, true),
456 .op = op
457 };
458
459 RETURN_PACKED(pack);
460 }
461
462 static unsigned
463 bi_pack_fma(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs)
464 {
465 if (!bundle.fma)
466 return BIFROST_FMA_NOP;
467
468 switch (bundle.fma->type) {
469 case BI_ADD:
470 return bi_pack_fma_add(bundle.fma, regs);
471 case BI_CMP:
472 case BI_BITWISE:
473 case BI_CONVERT:
474 case BI_CSEL:
475 return BIFROST_FMA_NOP;
476 case BI_FMA:
477 return bi_pack_fma_fma(bundle.fma, regs);
478 case BI_FREXP:
479 case BI_ISUB:
480 case BI_MINMAX:
481 return BIFROST_FMA_NOP;
482 case BI_MOV:
483 return bi_pack_fma_1src(bundle.fma, regs, BIFROST_FMA_OP_MOV);
484 case BI_FMOV:
485 case BI_SHIFT:
486 case BI_SWIZZLE:
487 case BI_ROUND:
488 return BIFROST_FMA_NOP;
489 default:
490 unreachable("Cannot encode class as FMA");
491 }
492 }
493
494 static unsigned
495 bi_pack_add_ld_vary(bi_clause *clause, bi_instruction *ins, struct bi_registers *regs)
496 {
497 unsigned size = nir_alu_type_get_type_size(ins->dest_type);
498 assert(size == 32 || size == 16);
499
500 unsigned op = (size == 32) ?
501 BIFROST_ADD_OP_LD_VAR_32 :
502 BIFROST_ADD_OP_LD_VAR_16;
503
504 unsigned cmask = bi_from_bytemask(ins->writemask, size / 8);
505 unsigned channels = util_bitcount(cmask);
506 assert(cmask == ((1 << channels) - 1));
507
508 unsigned packed_addr = 0;
509
510 if (ins->src[0] & BIR_INDEX_CONSTANT) {
511 /* Direct uses address field directly */
512 packed_addr = ins->src[0] & ~BIR_INDEX_CONSTANT;
513 assert(packed_addr < 0b1000);
514 } else {
515 /* Indirect gets an extra source */
516 packed_addr = bi_get_src(ins, regs, 0, false) | 0b11000;
517 }
518
519 /* The destination is thrown in the data register */
520 assert(ins->dest & BIR_INDEX_REGISTER);
521 clause->data_register = ins->dest & ~BIR_INDEX_REGISTER;
522
523 assert(channels >= 1 && channels <= 4);
524
525 struct bifrost_ld_var pack = {
526 .src0 = bi_get_src(ins, regs, 1, false),
527 .addr = packed_addr,
528 .channels = MALI_POSITIVE(channels),
529 .interp_mode = ins->load_vary.interp_mode,
530 .reuse = ins->load_vary.reuse,
531 .flat = ins->load_vary.flat,
532 .op = op
533 };
534
535 RETURN_PACKED(pack);
536 }
537
538 static unsigned
539 bi_pack_add_2src(bi_instruction *ins, struct bi_registers *regs, unsigned op)
540 {
541 struct bifrost_add_2src pack = {
542 .src0 = bi_get_src(ins, regs, 0, true),
543 .src1 = bi_get_src(ins, regs, 1, true),
544 .op = op
545 };
546
547 RETURN_PACKED(pack);
548 }
549
550 static unsigned
551 bi_pack_add_ld_ubo(bi_clause *clause, bi_instruction *ins, struct bi_registers *regs)
552 {
553 unsigned components = bi_load32_components(ins);
554
555 const unsigned ops[4] = {
556 BIFROST_ADD_OP_LD_UBO_1,
557 BIFROST_ADD_OP_LD_UBO_2,
558 BIFROST_ADD_OP_LD_UBO_3,
559 BIFROST_ADD_OP_LD_UBO_4
560 };
561
562 bi_write_data_register(clause, ins);
563 return bi_pack_add_2src(ins, regs, ops[components - 1]);
564 }
565
566 static enum bifrost_ldst_type
567 bi_pack_ldst_type(nir_alu_type T)
568 {
569 switch (T) {
570 case nir_type_float16: return BIFROST_LDST_F16;
571 case nir_type_float32: return BIFROST_LDST_F32;
572 case nir_type_int32: return BIFROST_LDST_I32;
573 case nir_type_uint32: return BIFROST_LDST_U32;
574 default: unreachable("Invalid type loaded");
575 }
576 }
577
578 static unsigned
579 bi_pack_add_ld_var_addr(bi_clause *clause, bi_instruction *ins, struct bi_registers *regs)
580 {
581 /* Only direct loads supported */
582 assert(ins->src[0] == BIR_INDEX_CONSTANT);
583
584 struct bifrost_ld_var_addr pack = {
585 .src0 = bi_get_src(ins, regs, 1, false),
586 .src1 = bi_get_src(ins, regs, 2, false),
587 .location = ins->constant.u64,
588 .type = bi_pack_ldst_type(ins->src_types[3]),
589 .op = BIFROST_ADD_OP_LD_VAR_ADDR
590 };
591
592 bi_write_data_register(clause, ins);
593 RETURN_PACKED(pack);
594 }
595
596 static unsigned
597 bi_pack_add_ld_attr(bi_clause *clause, bi_instruction *ins, struct bi_registers *regs)
598 {
599 /* Only direct loads supported */
600 assert(ins->src[0] == BIR_INDEX_CONSTANT);
601
602 struct bifrost_ld_attr pack = {
603 .src0 = bi_get_src(ins, regs, 1, false),
604 .src1 = bi_get_src(ins, regs, 2, false),
605 .location = ins->constant.u64,
606 .channels = MALI_POSITIVE(bi_load32_components(ins)),
607 .type = bi_pack_ldst_type(ins->dest_type),
608 .op = BIFROST_ADD_OP_LD_ATTR
609 };
610
611 bi_write_data_register(clause, ins);
612 RETURN_PACKED(pack);
613 }
614
615 static unsigned
616 bi_pack_add_st_vary(bi_clause *clause, bi_instruction *ins, struct bi_registers *regs)
617 {
618 assert(ins->store_channels >= 1 && ins->store_channels <= 4);
619
620 struct bifrost_st_vary pack = {
621 .src0 = bi_get_src(ins, regs, 1, false),
622 .src1 = bi_get_src(ins, regs, 2, false),
623 .src2 = bi_get_src(ins, regs, 3, false),
624 .channels = MALI_POSITIVE(ins->store_channels),
625 .op = BIFROST_ADD_OP_ST_VAR
626 };
627
628 bi_read_data_register(clause, ins);
629 RETURN_PACKED(pack);
630 }
631
632 static unsigned
633 bi_pack_add_atest(bi_clause *clause, bi_instruction *ins, struct bi_registers *regs)
634 {
635 /* TODO: fp16 */
636 assert(ins->src_types[1] == nir_type_float32);
637
638 struct bifrost_add_atest pack = {
639 .src0 = bi_get_src(ins, regs, 0, false),
640 .src1 = bi_get_src(ins, regs, 1, false),
641 .component = 1, /* Set for fp32 */
642 .op = BIFROST_ADD_OP_ATEST,
643 };
644
645 /* Despite *also* writing with the usual mechanism... quirky and
646 * perhaps unnecessary, but let's match the blob */
647 clause->data_register = ins->dest & ~BIR_INDEX_REGISTER;
648
649 RETURN_PACKED(pack);
650 }
651
652 static unsigned
653 bi_pack_add_blend(bi_instruction *ins, struct bi_registers *regs)
654 {
655 struct bifrost_add_inst pack = {
656 .src0 = bi_get_src(ins, regs, 0, false),
657 .op = BIFROST_ADD_OP_BLEND
658 };
659
660 /* TODO: Pack location in uniform_const */
661 assert(ins->blend_location == 0);
662
663 RETURN_PACKED(pack);
664 }
665
666 static unsigned
667 bi_pack_add(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs)
668 {
669 if (!bundle.add)
670 return BIFROST_ADD_NOP;
671
672 switch (bundle.add->type) {
673 case BI_ADD:
674 return BIFROST_ADD_NOP;
675 case BI_ATEST:
676 return bi_pack_add_atest(clause, bundle.add, regs);
677 case BI_BRANCH:
678 case BI_CMP:
679 return BIFROST_ADD_NOP;
680 case BI_BLEND:
681 return bi_pack_add_blend(bundle.add, regs);
682 case BI_BITWISE:
683 case BI_CONVERT:
684 case BI_DISCARD:
685 case BI_FREXP:
686 case BI_ISUB:
687 case BI_LOAD:
688 return BIFROST_ADD_NOP;
689 case BI_LOAD_ATTR:
690 return bi_pack_add_ld_attr(clause, bundle.add, regs);
691 case BI_LOAD_UNIFORM:
692 return bi_pack_add_ld_ubo(clause, bundle.add, regs);
693 case BI_LOAD_VAR:
694 return bi_pack_add_ld_vary(clause, bundle.add, regs);
695 case BI_LOAD_VAR_ADDRESS:
696 return bi_pack_add_ld_var_addr(clause, bundle.add, regs);
697 case BI_MINMAX:
698 case BI_MOV:
699 case BI_FMOV:
700 case BI_SHIFT:
701 case BI_STORE:
702 return BIFROST_ADD_NOP;
703 case BI_STORE_VAR:
704 return bi_pack_add_st_vary(clause, bundle.add, regs);
705 case BI_SPECIAL:
706 case BI_SWIZZLE:
707 case BI_TEX:
708 case BI_ROUND:
709 return BIFROST_ADD_NOP;
710 default:
711 unreachable("Cannot encode class as ADD");
712 }
713 }
714
715 struct bi_packed_bundle {
716 uint64_t lo;
717 uint64_t hi;
718 };
719
720 static struct bi_packed_bundle
721 bi_pack_bundle(bi_clause *clause, bi_bundle bundle, bi_bundle prev, bool first_bundle)
722 {
723 struct bi_registers regs = bi_assign_ports(bundle, prev);
724 bi_assign_uniform_constant(clause, &regs, bundle);
725 regs.first_instruction = first_bundle;
726
727 uint64_t reg = bi_pack_registers(regs);
728 uint64_t fma = bi_pack_fma(clause, bundle, &regs);
729 uint64_t add = bi_pack_add(clause, bundle, &regs);
730
731 struct bi_packed_bundle packed = {
732 .lo = reg | (fma << 35) | ((add & 0b111111) << 58),
733 .hi = add >> 6
734 };
735
736 return packed;
737 }
738
739 /* Packs the next two constants as a dedicated constant quadword at the end of
740 * the clause, returning the number packed. */
741
742 static unsigned
743 bi_pack_constants(bi_context *ctx, bi_clause *clause,
744 unsigned index,
745 struct util_dynarray *emission)
746 {
747 /* After these two, are we done? Determines tag */
748 bool done = clause->constant_count <= (index + 2);
749 bool only = clause->constant_count <= (index + 1);
750
751 /* TODO: Pos */
752 assert(index == 0 && clause->bundle_count == 1);
753
754 struct bifrost_fmt_constant quad = {
755 .pos = 0, /* TODO */
756 .tag = done ? BIFROST_FMTC_FINAL : BIFROST_FMTC_CONSTANTS,
757 .imm_1 = clause->constants[index + 0] >> 4,
758 .imm_2 = only ? 0 : clause->constants[index + 1] >> 4
759 };
760
761 /* XXX: On G71, Connor observed that the difference of the top 4 bits
762 * of the second constant with the first must be less than 8, otherwise
763 * we have to swap them. I am not able to reproduce this on G52,
764 * further investigation needed. Possibly an errata. XXX */
765
766 util_dynarray_append(emission, struct bifrost_fmt_constant, quad);
767
768 return 2;
769 }
770
771 static void
772 bi_pack_clause(bi_context *ctx, bi_clause *clause, bi_clause *next,
773 struct util_dynarray *emission)
774 {
775 struct bi_packed_bundle ins_1 = bi_pack_bundle(clause, clause->bundles[0], clause->bundles[0], true);
776 assert(clause->bundle_count == 1);
777
778 /* Used to decide if we elide writes */
779 bool is_fragment = ctx->stage == MESA_SHADER_FRAGMENT;
780
781 /* State for packing constants throughout */
782 unsigned constant_index = 0;
783
784 struct bifrost_fmt1 quad_1 = {
785 .tag = clause->constant_count ? BIFROST_FMT1_CONSTANTS : BIFROST_FMT1_FINAL,
786 .header = bi_pack_header(clause, next, is_fragment),
787 .ins_1 = ins_1.lo,
788 .ins_2 = ins_1.hi & ((1 << 11) - 1),
789 .ins_0 = (ins_1.hi >> 11) & 0b111,
790 };
791
792 util_dynarray_append(emission, struct bifrost_fmt1, quad_1);
793
794 /* Pack the remaining constants */
795
796 while (constant_index < clause->constant_count) {
797 constant_index += bi_pack_constants(ctx, clause,
798 constant_index, emission);
799 }
800 }
801
802 static bi_clause *
803 bi_next_clause(bi_context *ctx, pan_block *block, bi_clause *clause)
804 {
805 /* Try the next clause in this block */
806 if (clause->link.next != &((bi_block *) block)->clauses)
807 return list_first_entry(&(clause->link), bi_clause, link);
808
809 /* Try the next block, or the one after that if it's empty, etc .*/
810 pan_block *next_block = pan_next_block(block);
811
812 bi_foreach_block_from(ctx, next_block, block) {
813 bi_block *blk = (bi_block *) block;
814
815 if (!list_is_empty(&blk->clauses))
816 return list_first_entry(&(blk->clauses), bi_clause, link);
817 }
818
819 return NULL;
820 }
821
822 void
823 bi_pack(bi_context *ctx, struct util_dynarray *emission)
824 {
825 util_dynarray_init(emission, NULL);
826
827 bi_foreach_block(ctx, _block) {
828 bi_block *block = (bi_block *) _block;
829
830 bi_foreach_clause_in_block(block, clause) {
831 bi_clause *next = bi_next_clause(ctx, _block, clause);
832 bi_pack_clause(ctx, clause, next, emission);
833 }
834 }
835 }