pan/bi: Structify ADD ICMP 16
[mesa.git] / src / panfrost / bifrost / bifrost.h
1 /*
2 * Copyright (C) 2019 Connor Abbott <cwabbott0@gmail.com>
3 * Copyright (C) 2019 Lyude Paul <thatslyude@gmail.com>
4 * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 */
25
26 #ifndef __bifrost_h__
27 #define __bifrost_h__
28
29 #include <stdint.h>
30 #include <stdbool.h>
31
32 #define BIFROST_DBG_MSGS 0x0001
33 #define BIFROST_DBG_SHADERS 0x0002
34
35 extern int bifrost_debug;
36
37 enum bifrost_clause_type {
38 BIFROST_CLAUSE_NONE = 0,
39 BIFROST_CLAUSE_LOAD_VARY = 1,
40 BIFROST_CLAUSE_UBO = 2,
41 BIFROST_CLAUSE_TEX = 3,
42 BIFROST_CLAUSE_SSBO_LOAD = 5,
43 BIFROST_CLAUSE_SSBO_STORE = 6,
44 BIFROST_CLAUSE_BLEND = 9,
45 BIFROST_CLAUSE_FRAGZ = 12,
46 BIFROST_CLAUSE_ATEST = 13,
47 BIFROST_CLAUSE_64BIT = 15
48 };
49
50 struct bifrost_header {
51 unsigned unk0 : 7;
52 // If true, convert any infinite result of any floating-point operation to
53 // the biggest representable number.
54 unsigned suppress_inf: 1;
55 // Convert any NaN results to 0.
56 unsigned suppress_nan : 1;
57 unsigned unk1 : 2;
58 // true if the execution mask of the next clause is the same as the mask of
59 // the current clause.
60 unsigned back_to_back : 1;
61 unsigned no_end_of_shader: 1;
62 unsigned unk2 : 2;
63 // Set to true for fragment shaders, to implement this bit of spec text
64 // from section 7.1.5 of the GLSL ES spec:
65 //
66 // "Stores to image and buffer variables performed by helper invocations
67 // have no effect on the underlying image or buffer memory."
68 //
69 // Helper invocations are threads (invocations) corresponding to pixels in
70 // a quad that aren't actually part of the triangle, but are included to
71 // make derivatives work correctly. They're usually turned on, but they
72 // need to be masked off for GLSL-level stores. This bit seems to be the
73 // only bit that's actually different between fragment shaders and other
74 // shaders, so this is probably what it's doing.
75 unsigned elide_writes : 1;
76 // If backToBack is off:
77 // - true for conditional branches and fallthrough
78 // - false for unconditional branches
79 // The blob seems to always set it to true if back-to-back is on.
80 unsigned branch_cond : 1;
81 // This bit is set when the next clause writes to the data register of some
82 // previous clause.
83 unsigned datareg_writebarrier: 1;
84 unsigned datareg : 6;
85 unsigned scoreboard_deps: 8;
86 unsigned scoreboard_index: 3;
87 enum bifrost_clause_type clause_type: 4;
88 unsigned unk3 : 1; // part of clauseType?
89 enum bifrost_clause_type next_clause_type: 4;
90 unsigned unk4 : 1; // part of nextClauseType?
91 } __attribute__((packed));
92
93 enum bifrost_packed_src {
94 BIFROST_SRC_PORT0 = 0,
95 BIFROST_SRC_PORT1 = 1,
96 BIFROST_SRC_PORT3 = 2,
97 BIFROST_SRC_STAGE = 3,
98 BIFROST_SRC_CONST_LO = 4,
99 BIFROST_SRC_CONST_HI = 5,
100 BIFROST_SRC_PASS_FMA = 6,
101 BIFROST_SRC_PASS_ADD = 7,
102 };
103
104 #define BIFROST_FMA_EXT (0xe0000)
105 #define BIFROST_FMA_OP_MOV BIFROST_FMA_EXT | (0x32d)
106 #define BIFROST_FMA_OP_FREXPE_LOG BIFROST_FMA_EXT | 0x3c5
107 #define BIFROST_FMA_OP_ADD_FREXPM ((BIFROST_FMA_EXT | 0x1e80) >> 3)
108 #define BIFROST_FMA_SEL_16(swiz) (((BIFROST_FMA_EXT | 0x1e00) >> 3) | (swiz))
109
110 #define BIFROST_FMA_ROUND_16(mode, swiz) (BIFROST_FMA_EXT | 0x1800 | (swiz) | ((mode) << 6))
111 #define BIFROST_FMA_ROUND_32(mode) (BIFROST_FMA_EXT | 0x1805 | ((mode) << 6))
112
113 struct bifrost_fma_inst {
114 unsigned src0 : 3;
115 unsigned op : 20;
116 } __attribute__((packed));
117
118 struct bifrost_fma_2src {
119 unsigned src0 : 3;
120 unsigned src1 : 3;
121 unsigned op : 17;
122 } __attribute__((packed));
123
124 #define BIFROST_FMA_OP_SEL8 (0x71)
125
126 struct bifrost_fma_sel8 {
127 unsigned src0 : 3;
128 unsigned src1 : 3;
129 unsigned src2 : 3;
130 unsigned src3 : 3;
131 unsigned swizzle : 4;
132 unsigned op : 7;
133 } __attribute__((packed));
134
135 #define BIFROST_FMA_OP_MSCALE (0x50 >> 3)
136
137 struct bifrost_fma_mscale {
138 unsigned src0 : 3;
139 unsigned src1 : 3;
140 unsigned src2 : 3;
141 unsigned src3 : 3;
142
143 /* If mscale_mode is set - an MSCALE specific mode. If it is not set, a
144 * regular outmod */
145 unsigned mode : 2;
146 unsigned mscale_mode : 1;
147
148 unsigned src0_abs : 1;
149 unsigned src1_neg : 1;
150 unsigned src2_neg : 1;
151 unsigned op : 5;
152 } __attribute__((packed));
153
154 #define BIFROST_ADD_OP_BLEND (0x1952c)
155 #define BIFROST_ADD_OP_FRCP_FAST_F32 (0x0cc00)
156 #define BIFROST_ADD_OP_FRCP_FAST_F16_X (0x0ce10)
157 #define BIFROST_ADD_OP_FRCP_FAST_F16_Y (0x0ce30)
158 #define BIFROST_ADD_OP_FRSQ_FAST_F32 (0x0cc20)
159 #define BIFROST_ADD_OP_FRSQ_FAST_F16_X (0x0ce50)
160 #define BIFROST_ADD_OP_FRSQ_FAST_F16_Y (0x0ce70)
161 #define BIFROST_ADD_OP_LOG2_HELP (0x0cc68)
162 #define BIFROST_ADD_OP_FEXP2_FAST (0x0cd58)
163
164 struct bifrost_add_inst {
165 unsigned src0 : 3;
166 unsigned op : 17;
167 } __attribute__((packed));
168
169 #define BIFROST_ADD_OP_DISCARD (0x19100 >> 8)
170
171 enum bifrost_discard_cond {
172 BIFROST_DISCARD_FEQ = 0,
173 BIFROST_DISCARD_FNE = 1,
174 BIFROST_DISCARD_FLE = 2,
175 BIFROST_DISCARD_FLT = 3,
176 };
177
178 struct bifrost_add_discard {
179 unsigned src0 : 3;
180 unsigned src1 : 3;
181 enum bifrost_discard_cond cond : 2;
182 /* Zero for fp32 */
183 unsigned src0_select : 1;
184 unsigned src1_select : 1;
185 unsigned fp32 : 1;
186 unsigned op : 9;
187 } __attribute__((packed));
188
189 #define BIFROST_ADD_OP_LD_UBO_1 (0x0c1a0 >> 3)
190 #define BIFROST_ADD_OP_LD_UBO_2 (0x0c1e0 >> 3)
191 #define BIFROST_ADD_OP_LD_UBO_3 (0x0caa0 >> 3)
192 #define BIFROST_ADD_OP_LD_UBO_4 (0x0c220 >> 3)
193 #define BIFROST_ADD_SEL_16(swiz) ((0xea60 >> 3) | (swiz))
194
195 struct bifrost_add_2src {
196 unsigned src0 : 3;
197 unsigned src1 : 3;
198 unsigned op : 14;
199 } __attribute__((packed));
200
201 #define BIFROST_ADD_OP_FMAX32 (0x00)
202 #define BIFROST_ADD_OP_FMIN32 (0x01)
203 #define BIFROST_ADD_OP_FADD32 (0x02)
204
205 #define BIFROST_ADD_OP_FADD16 (0x0A)
206
207 struct bifrost_add_faddmin {
208 unsigned src0 : 3;
209 unsigned src1 : 3;
210 unsigned src1_abs : 1;
211 unsigned src0_neg : 1;
212 unsigned src1_neg : 1;
213 unsigned select : 2; /* swizzle_0 for fp16 */
214 unsigned outmod : 2; /* swizzle_1 for fp16 */
215 unsigned mode : 2;
216 unsigned src0_abs : 1;
217 unsigned op : 4;
218 } __attribute__((packed));
219
220 #define BIFROST_ADD_OP_FMAX16 (0x10)
221 #define BIFROST_ADD_OP_FMIN16 (0x12)
222
223 struct bifrost_add_fmin16 {
224 unsigned src0 : 3;
225 unsigned src1 : 3;
226 /* abs2 inferred as with FMA */
227 unsigned abs1 : 1;
228 unsigned src0_neg : 1;
229 unsigned src1_neg : 1;
230 unsigned src0_swizzle : 2;
231 unsigned src1_swizzle : 2;
232 unsigned mode : 2;
233 unsigned op : 5;
234 } __attribute__((packed));
235
236 #define BIFROST_ADD_OP_ST_VAR (0x19300 >> 8)
237
238 struct bifrost_st_vary {
239 unsigned src0 : 3;
240 unsigned src1 : 3;
241 unsigned src2 : 3;
242 unsigned channels : 2;
243 unsigned op : 9;
244 } __attribute__((packed));
245
246 #define BIFROST_ADD_OP_ATEST (0xc8f)
247
248 struct bifrost_add_atest {
249 /* gl_SampleMask (R60) */
250 unsigned src0 : 3;
251
252 /* Alpha value */
253 unsigned src1 : 3;
254
255 /* If half, X/Y select. If !half, always set */
256 unsigned component : 1;
257 unsigned half : 1;
258
259 unsigned op : 12;
260 } __attribute__((packed));
261
262 enum bifrost_outmod {
263 BIFROST_NONE = 0x0,
264 BIFROST_POS = 0x1,
265 BIFROST_SAT_SIGNED = 0x2,
266 BIFROST_SAT = 0x3,
267 };
268
269 enum bifrost_roundmode {
270 BIFROST_RTE = 0x0, /* round to even */
271 BIFROST_RTP = 0x1, /* round to positive */
272 BIFROST_RTN = 0x2, /* round to negative */
273 BIFROST_RTZ = 0x3 /* round to zero */
274 };
275
276 /* NONE: Same as fmax() and fmin() -- return the other
277 * number if any number is NaN. Also always return +0 if
278 * one argument is +0 and the other is -0.
279 *
280 * NAN_WINS: Instead of never returning a NaN, always return
281 * one. The "greater"/"lesser" NaN is always returned, first
282 * by checking the sign and then the mantissa bits.
283 *
284 * SRC1_WINS: For max, implement src0 > src1 ? src0 : src1.
285 * For min, implement src0 < src1 ? src0 : src1. This
286 * includes handling NaN's and signedness of 0 differently
287 * from above, since +0 and -0 compare equal and comparisons
288 * always return false for NaN's. As a result, this mode is
289 * *not* commutative.
290 *
291 * SRC0_WINS: For max, implement src0 < src1 ? src1 : src0
292 * For min, implement src0 > src1 ? src1 : src0
293 */
294
295
296 enum bifrost_minmax_mode {
297 BIFROST_MINMAX_NONE = 0x0,
298 BIFROST_NAN_WINS = 0x1,
299 BIFROST_SRC1_WINS = 0x2,
300 BIFROST_SRC0_WINS = 0x3,
301 };
302
303 #define BIFROST_FMA_OP_FADD32 (0x58 >> 2)
304 #define BIFROST_FMA_OP_FMAX32 (0x40 >> 2)
305 #define BIFROST_FMA_OP_FMIN32 (0x44 >> 2)
306
307 struct bifrost_fma_add {
308 unsigned src0 : 3;
309 unsigned src1 : 3;
310 unsigned src1_abs : 1;
311 unsigned src0_neg : 1;
312 unsigned src1_neg : 1;
313 unsigned unk : 3;
314 unsigned src0_abs : 1;
315 enum bifrost_roundmode roundmode : 2;
316 enum bifrost_outmod outmod : 2;
317 unsigned op : 6;
318 } __attribute__((packed));
319
320 #define BIFROST_FMA_OP_FMAX16 (0xC0 >> 2)
321 #define BIFROST_FMA_OP_FMIN16 (0xCC >> 2)
322 #define BIFROST_FMA_OP_FADD16 (0xD8 >> 2)
323
324 struct bifrost_fma_add_minmax16 {
325 unsigned src0 : 3;
326 unsigned src1 : 3;
327 /* abs2 inferred as (src1 < src0) */
328 unsigned abs1 : 1;
329 unsigned src0_neg : 1;
330 unsigned src1_neg : 1;
331 unsigned src0_swizzle : 2;
332 unsigned src1_swizzle : 2;
333 unsigned mode : 2;
334 enum bifrost_outmod outmod : 2;
335 /* roundmode for add, min/max mode for min/max */
336 unsigned op : 6;
337 } __attribute__((packed));
338
339 #define BIFROST_FMA_OP_FMA (0x00)
340
341 struct bifrost_fma_fma {
342 unsigned src0 : 3;
343 unsigned src1 : 3;
344 unsigned src2 : 3;
345 unsigned src_expand : 3;
346 unsigned src0_abs : 1;
347 enum bifrost_roundmode roundmode : 2;
348 enum bifrost_outmod outmod : 2;
349 unsigned src0_neg : 1; /* 14 */
350 unsigned src2_neg : 1;
351 unsigned src1_abs : 1;
352 unsigned src2_abs : 1; /* 17 */
353 unsigned op : 2;
354 } __attribute__((packed));
355
356 #define BIFROST_FMA_OP_FMA16 (0x2)
357
358 struct bifrost_fma_fma16 {
359 unsigned src0 : 3;
360 unsigned src1 : 3;
361 unsigned src2 : 3;
362 unsigned swizzle_0 : 2;
363 unsigned swizzle_1 : 2;
364 enum bifrost_roundmode roundmode : 2;
365 enum bifrost_outmod outmod : 2;
366 unsigned src0_neg : 1;
367 unsigned src2_neg : 1;
368 unsigned swizzle_2 : 2;
369 unsigned op : 2;
370 } __attribute__((packed));
371
372 enum bifrost_csel_cond {
373 BIFROST_FEQ_F = 0x0,
374 BIFROST_FGT_F = 0x1,
375 BIFROST_FGE_F = 0x2,
376 BIFROST_IEQ_F = 0x3,
377 BIFROST_IGT_I = 0x4,
378 BIFROST_IGE_I = 0x5,
379 BIFROST_UGT_I = 0x6,
380 BIFROST_UGE_I = 0x7
381 };
382
383 #define BIFROST_FMA_OP_CSEL4 (0x5c)
384 #define BIFROST_FMA_OP_CSEL4_V16 (0xdc)
385
386 struct bifrost_csel4 {
387 unsigned src0 : 3;
388 unsigned src1 : 3;
389 unsigned src2 : 3;
390 unsigned src3 : 3;
391 enum bifrost_csel_cond cond : 3;
392 unsigned op : 8;
393 } __attribute__((packed));
394
395 #define BIFROST_FMA_OP_RSHIFT_NAND (0x60000 >> 12)
396 #define BIFROST_FMA_OP_RSHIFT_AND (0x61000 >> 12)
397 #define BIFROST_FMA_OP_LSHIFT_NAND (0x62000 >> 12)
398 #define BIFROST_FMA_OP_LSHIFT_AND (0x63000 >> 12)
399 #define BIFROST_FMA_OP_RSHIFT_XOR (0x64000 >> 12)
400 #define BIFROST_FMA_OP_LSHIFT_ADD_32 (0x65200 >> 6)
401 #define BIFROST_FMA_OP_LSHIFT_SUB_32 (0x65600 >> 6)
402 #define BIFROST_FMA_OP_LSHIFT_RSUB_32 (0x65a00 >> 6)
403 #define BIFROST_FMA_OP_RSHIFT_ADD_32 (0x65e00 >> 6)
404 #define BIFROST_FMA_OP_RSHIFT_SUB_32 (0x66200 >> 6)
405 #define BIFROST_FMA_OP_RSHIFT_RSUB_32 (0x66600 >> 6)
406
407 struct bifrost_shift_fma {
408 unsigned src0 : 3;
409 unsigned src1 : 3;
410 unsigned src2 : 3;
411 unsigned half : 3;
412 unsigned unk : 1; /* always set? */
413 unsigned invert_1 : 1; /* Inverts sources to combining op */
414 /* For XOR, switches RSHIFT to LSHIFT since only one invert needed */
415 unsigned invert_2 : 1;
416 unsigned op : 8;
417 } __attribute__((packed));
418
419 struct bifrost_shift_add {
420 unsigned src0 : 3;
421 unsigned src1 : 3;
422 unsigned src2 : 3;
423 unsigned zero : 2;
424
425 unsigned invert_1 : 1;
426 unsigned invert_2 : 1;
427
428 unsigned op : 7;
429 } __attribute__((packed));
430
431 enum bifrost_fcmp_cond {
432 BIFROST_OEQ = 0,
433 BIFROST_OGT = 1,
434 BIFROST_OGE = 2,
435 BIFROST_UNE = 3,
436 BIFROST_OLT = 4,
437 BIFROST_OLE = 5,
438 };
439
440 #define BIFROST_FMA_OP_FCMP_GL (0x48000 >> 13)
441 #define BIFROST_FMA_OP_FCMP_D3D (0x4c000 >> 13)
442
443 struct bifrost_fma_fcmp {
444 unsigned src0 : 3;
445 unsigned src1 : 3;
446 unsigned src1_abs : 1;
447 unsigned unk1 : 1;
448 unsigned src1_neg : 1;
449 unsigned src_expand : 3;
450 unsigned src0_abs : 1;
451 enum bifrost_fcmp_cond cond : 3;
452 unsigned op : 7;
453 } __attribute__((packed));
454
455 struct bifrost_add_fcmp {
456 unsigned src0 : 3;
457 unsigned src1 : 3;
458 enum bifrost_fcmp_cond cond : 3;
459 unsigned src_expand : 2;
460 unsigned src0_abs : 1;
461 unsigned src1_abs : 1;
462 unsigned src1_neg : 1;
463 unsigned op : 6;
464 } __attribute__((packed));
465
466 #define BIFROST_FMA_OP_FCMP_GL_16 (0xc8000 >> 13)
467 #define BIFROST_FMA_OP_FCMP_D3D_16 (0xcc000 >> 13)
468
469 struct bifrost_fma_fcmp16 {
470 unsigned src0 : 3;
471 unsigned src1 : 3;
472
473 /* abs2 inferred */
474 unsigned abs1 : 1;
475 unsigned unk : 2;
476
477 unsigned src0_swizzle : 2;
478 unsigned src1_swizzle : 2;
479
480 enum bifrost_fcmp_cond cond : 3;
481 unsigned op : 7;
482 } __attribute__((packed));
483
484 struct bifrost_add_fcmp16 {
485 unsigned src0 : 3;
486 unsigned src1 : 3;
487 enum bifrost_fcmp_cond cond : 3;
488
489 unsigned src0_swizzle : 2;
490 unsigned src1_swizzle : 2;
491
492 /* No abs mods */
493 unsigned src0_neg : 1;
494
495 unsigned op : 6;
496 } __attribute__((packed));
497
498 enum bifrost_icmp_cond {
499 BIFROST_ICMP_IGT = 0,
500 BIFROST_ICMP_IGE = 1,
501 BIFROST_ICMP_UGT = 2,
502 BIFROST_ICMP_UGE = 3,
503 BIFROST_ICMP_EQ = 4,
504 BIFROST_ICMP_NEQ = 5,
505 BIFROST_ICMP_32_OR_8 = 6, /* nested */
506 BIFROST_ICMP_64 = 7, /* nested */
507 };
508
509 struct bifrost_fma_icmp32 {
510 unsigned src0 : 3;
511 unsigned src1 : 3;
512 enum bifrost_icmp_cond cond : 3;
513 unsigned unk1 : 1; /* set */
514 unsigned d3d : 1;
515 unsigned op : 12;
516 } __attribute__((packed));
517
518 struct bifrost_fma_icmp16 {
519 unsigned src0 : 3;
520 unsigned src1 : 3;
521 unsigned unk : 5; /* 11010 */
522 enum bifrost_icmp_cond cond : 3;
523 unsigned op : 9;
524 } __attribute__((packed));
525
526 #define BIFROST_ADD_OP_ICMP_32 (0x0f600 >> 8)
527 #define BIFROST_ADD_OP_ICMP_16 (0x0f000 >> 11)
528
529 struct bifrost_add_icmp {
530 unsigned src0 : 3;
531 unsigned src1 : 3;
532 enum bifrost_icmp_cond cond : 3;
533 unsigned sz : 1; /* 1 for 32, 0 for 8 */
534 unsigned d3d : 1;
535 unsigned op : 9;
536 } __attribute__((packed));
537
538 struct bifrost_add_icmp16 {
539 unsigned src0 : 3;
540 unsigned src1 : 3;
541 unsigned src0_swizzle : 2;
542 unsigned src1_swizzle : 2;
543 unsigned d3d : 1;
544 enum bifrost_icmp_cond cond : 3;
545 unsigned op : 6;
546 } __attribute__((packed));
547
548 /* Two sources for vectorization */
549 #define BIFROST_FMA_FLOAT32_TO_16 (0xdd000 >> 3)
550 #define BIFROST_ADD_FLOAT32_TO_16 (0x0EC00 >> 3)
551
552 enum bifrost_convert_mode {
553 BIFROST_CONV_UNK0 = 0,
554 BIFROST_CONV_F32_TO_I32 = 1,
555 BIFROST_CONV_F16_TO_I16 = 2,
556 BIFROST_CONV_I32_TO_F32 = 3,
557 BIFROST_CONV_I16_TO_X32 = 4,
558 BIFROST_CONV_F16_TO_F32 = 5,
559 BIFROST_CONV_I16_TO_F16 = 6,
560 BIFROST_CONV_UNK7 = 7
561 };
562
563 /* i16 to x32 */
564 #define BIFROST_CONVERT_4(is_unsigned, component, to_float) \
565 ((is_unsigned & 1) | ((component & 1) << 1) | ((to_float & 1) << 2) | \
566 ((0x3) << 3) | ((4) << 5) | 0x100)
567
568 /* f16 to f32 */
569 #define BIFROST_CONVERT_5(component) \
570 ((component & 1) | ((1) << 1) | ((5) << 5) | 0x100)
571
572 /* Other conversions */
573 #define BIFROST_CONVERT(is_unsigned, roundmode, swizzle, mode) \
574 ((is_unsigned & 1) | ((roundmode & 3) << 1) | ((swizzle & 3) << 3) | ((mode & 7) << 5))
575
576 #define BIFROST_FMA_CONVERT (0xe0000)
577 #define BIFROST_ADD_CONVERT (0x07800)
578
579 enum bifrost_ldst_type {
580 BIFROST_LDST_F16 = 0,
581 BIFROST_LDST_F32 = 1,
582 BIFROST_LDST_I32 = 2,
583 BIFROST_LDST_U32 = 3
584 };
585
586 #define BIFROST_ADD_OP_LD_VAR_ADDR (0x18000 >> 10)
587
588 struct bifrost_ld_var_addr {
589 unsigned src0 : 3;
590 unsigned src1 : 3;
591 unsigned location : 5;
592 enum bifrost_ldst_type type : 2;
593 unsigned op : 7;
594 } __attribute__((packed));
595
596 #define BIFROST_ADD_OP_LD_ATTR (0x08000 >> 12)
597
598 struct bifrost_ld_attr {
599 unsigned src0 : 3;
600 unsigned src1 : 3;
601 unsigned location : 5;
602 unsigned channels : 2; /* MALI_POSITIVE */
603 enum bifrost_ldst_type type : 2;
604 unsigned op : 5;
605 } __attribute__((packed));
606
607 enum bifrost_interp_mode {
608 BIFROST_INTERP_PER_FRAG = 0x0,
609 BIFROST_INTERP_CENTROID = 0x1,
610 BIFROST_INTERP_DEFAULT = 0x2,
611 BIFROST_INTERP_EXPLICIT = 0x3
612 };
613
614 #define BIFROST_ADD_OP_LD_VAR_16 (0x1a << 1)
615 #define BIFROST_ADD_OP_LD_VAR_32 (0x0a << 1)
616
617 /* Fixed location for gl_FragCoord.zw */
618 #define BIFROST_FRAGZ (23)
619 #define BIFROST_FRAGW (22)
620
621 struct bifrost_ld_var {
622 unsigned src0 : 3;
623
624 /* If top two bits set, indirect with src in bottom three */
625 unsigned addr : 5;
626
627 unsigned channels : 2; /* MALI_POSITIVE */
628 enum bifrost_interp_mode interp_mode : 2;
629 unsigned reuse : 1;
630 unsigned flat : 1;
631 unsigned op : 6;
632 } __attribute__((packed));
633
634 struct bifrost_tex_ctrl {
635 unsigned sampler_index : 4; // also used to signal indirects
636 unsigned tex_index : 7;
637 bool no_merge_index : 1; // whether to merge (direct) sampler & texture indices
638 bool filter : 1; // use the usual filtering pipeline (0 for texelFetch & textureGather)
639 unsigned unk0 : 2;
640 bool texel_offset : 1; // *Offset()
641 bool is_shadow : 1;
642 bool is_array : 1;
643 unsigned tex_type : 2; // 2D, 3D, Cube, Buffer
644 bool compute_lod : 1; // 0 for *Lod()
645 bool not_supply_lod : 1; // 0 for *Lod() or when a bias is applied
646 bool calc_gradients : 1; // 0 for *Grad()
647 unsigned unk1 : 1;
648 unsigned result_type : 4; // integer, unsigned, float TODO: why is this 4 bits?
649 unsigned unk2 : 4;
650 } __attribute__((packed));
651
652 struct bifrost_dual_tex_ctrl {
653 unsigned sampler_index0 : 2;
654 unsigned unk0 : 2;
655 unsigned tex_index0 : 2;
656 unsigned sampler_index1 : 2;
657 unsigned tex_index1 : 2;
658 unsigned unk1 : 22;
659 } __attribute__((packed));
660
661 #define BIFROST_ADD_OP_TEX_COMPACT_F32 (0x0b000 >> 10)
662 #define BIFROST_ADD_OP_TEX_COMPACT_F16 (0x1b000 >> 10)
663
664 struct bifrost_tex_compact {
665 unsigned src0 : 3;
666 unsigned src1 : 3;
667 unsigned tex_index : 3;
668 unsigned unknown : 1;
669 unsigned sampler_index : 3;
670 unsigned op : 7;
671 } __attribute__((packed));
672
673 enum branch_bit_size {
674 BR_SIZE_32 = 0,
675 BR_SIZE_16XX = 1,
676 BR_SIZE_16YY = 2,
677 // For the above combinations of bitsize and location, an extra bit is
678 // encoded via comparing the sources. The only possible source of ambiguity
679 // would be if the sources were the same, but then the branch condition
680 // would be always true or always false anyways, so we can ignore it. But
681 // this no longer works when comparing the y component to the x component,
682 // since it's valid to compare the y component of a source against its own
683 // x component. Instead, the extra bit is encoded via an extra bitsize.
684 BR_SIZE_16YX0 = 3,
685 BR_SIZE_16YX1 = 4,
686 BR_SIZE_32_AND_16X = 5,
687 BR_SIZE_32_AND_16Y = 6,
688 // Used for comparisons with zero and always-true, see below. I think this
689 // only works for integer comparisons.
690 BR_SIZE_ZERO = 7,
691 };
692
693 enum bifrost_reg_write_unit {
694 REG_WRITE_NONE = 0, // don't write
695 REG_WRITE_TWO, // write using reg2
696 REG_WRITE_THREE, // write using reg3
697 };
698
699 struct bifrost_regs {
700 unsigned uniform_const : 8;
701 unsigned reg2 : 6;
702 unsigned reg3 : 6;
703 unsigned reg0 : 5;
704 unsigned reg1 : 6;
705 unsigned ctrl : 4;
706 } __attribute__((packed));
707
708 enum bifrost_branch_cond {
709 BR_COND_LT = 0,
710 BR_COND_LE = 1,
711 BR_COND_GE = 2,
712 BR_COND_GT = 3,
713 // Equal vs. not-equal determined by src0/src1 comparison
714 BR_COND_EQ = 4,
715 // floating-point comparisons
716 // Becomes UNE when you flip the arguments
717 BR_COND_OEQ = 5,
718 // TODO what happens when you flip the arguments?
719 BR_COND_OGT = 6,
720 BR_COND_OLT = 7,
721 };
722
723 enum bifrost_branch_code {
724 BR_ALWAYS = 63,
725 };
726
727 struct bifrost_branch {
728 unsigned src0 : 3;
729
730 /* For BR_SIZE_ZERO, upper two bits become ctrl */
731 unsigned src1 : 3;
732
733 /* Offset source -- always uniform/const but
734 * theoretically could support indirect jumps? */
735 unsigned src2 : 3;
736
737 enum bifrost_branch_cond cond : 3;
738 enum branch_bit_size size : 3;
739
740 unsigned op : 5;
741 };
742
743 /* Clause packing */
744
745 #define BIFROST_FMA_NOP (0x701960 | BIFROST_SRC_STAGE)
746 #define BIFROST_ADD_NOP (0x3D960 | BIFROST_SRC_STAGE)
747
748 struct bifrost_fmt1 {
749 unsigned ins_0 : 3;
750 unsigned tag : 5;
751 uint64_t ins_1 : 64;
752 unsigned ins_2 : 11;
753 uint64_t header : 45;
754 } __attribute__((packed));
755
756 #define BIFROST_FMT1_INSTRUCTIONS 0b00101
757 #define BIFROST_FMT1_FINAL 0b01001
758 #define BIFROST_FMT1_CONSTANTS 0b00001
759
760 #define BIFROST_FMTC_CONSTANTS 0b0011
761 #define BIFROST_FMTC_FINAL 0b0111
762
763 struct bifrost_fmt_constant {
764 unsigned pos : 4;
765 unsigned tag : 4;
766 uint64_t imm_1 : 60;
767 uint64_t imm_2 : 60;
768 } __attribute__((packed));
769
770 enum bifrost_reg_control {
771 BIFROST_WRITE_FMA_P2 = 1,
772 BIFROST_WRITE_FMA_P2_READ_P3 = 2,
773 BIFROST_FIRST_WRITE_FMA_P2_READ_P3 = 3,
774 BIFROST_READ_P3 = 4,
775 BIFROST_WRITE_ADD_P2 = 5,
776 BIFROST_WRITE_ADD_P2_READ_P3 = 6,
777 BIFROST_WRITE_ADD_P2_FMA_P3 = 7,
778
779 BIFROST_FIRST_NONE = 8,
780 BIFROST_FIRST_WRITE_FMA_P2 = 9,
781 /* INSTR_INVALID_ENC */
782 BIFROST_REG_NONE = 11,
783 BIFROST_FIRST_READ_P3 = 12,
784 BIFROST_FIRST_WRITE_ADD_P2 = 13,
785 BIFROST_FIRST_WRITE_ADD_P2_READ_P3 = 14,
786 BIFROST_FIRST_WRITE_ADD_P2_FMA_P3 = 15
787 };
788
789 #endif