pan/midgard: Identify stack barrier flag
[mesa.git] / src / panfrost / midgard / midgard.h
1 /* Author(s):
2 * Connor Abbott
3 * Alyssa Rosenzweig
4 *
5 * Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
6 * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27 #ifndef __midgard_h__
28 #define __midgard_h__
29
30 #include <stdint.h>
31 #include <stdbool.h>
32 #include "panfrost-job.h"
33
34 #define MIDGARD_DBG_MSGS 0x0001
35 #define MIDGARD_DBG_SHADERS 0x0002
36 #define MIDGARD_DBG_SHADERDB 0x0004
37
38 extern int midgard_debug;
39
40 typedef enum {
41 midgard_word_type_alu,
42 midgard_word_type_load_store,
43 midgard_word_type_texture,
44 midgard_word_type_unknown
45 } midgard_word_type;
46
47 typedef enum {
48 midgard_alu_vmul,
49 midgard_alu_sadd,
50 midgard_alu_smul,
51 midgard_alu_vadd,
52 midgard_alu_lut
53 } midgard_alu;
54
55 enum {
56 TAG_INVALID = 0x0,
57 TAG_BREAK = 0x1,
58 TAG_TEXTURE_4_VTX = 0x2,
59 TAG_TEXTURE_4 = 0x3,
60 TAG_TEXTURE_4_BARRIER = 0x4,
61 TAG_LOAD_STORE_4 = 0x5,
62 TAG_UNKNOWN_1 = 0x6,
63 TAG_UNKNOWN_2 = 0x7,
64 TAG_ALU_4 = 0x8,
65 TAG_ALU_8 = 0x9,
66 TAG_ALU_12 = 0xA,
67 TAG_ALU_16 = 0xB,
68 TAG_ALU_4_WRITEOUT = 0xC,
69 TAG_ALU_8_WRITEOUT = 0xD,
70 TAG_ALU_12_WRITEOUT = 0xE,
71 TAG_ALU_16_WRITEOUT = 0xF
72 };
73
74 /*
75 * ALU words
76 */
77
78 typedef enum {
79 midgard_alu_op_fadd = 0x10,
80 midgard_alu_op_fmul = 0x14,
81
82 midgard_alu_op_fmin = 0x28,
83 midgard_alu_op_fmax = 0x2C,
84
85 midgard_alu_op_fmov = 0x30, /* fmov_rte */
86 midgard_alu_op_fmov_rtz = 0x31,
87 midgard_alu_op_fmov_rtn = 0x32,
88 midgard_alu_op_fmov_rtp = 0x33,
89 midgard_alu_op_froundeven = 0x34,
90 midgard_alu_op_ftrunc = 0x35,
91 midgard_alu_op_ffloor = 0x36,
92 midgard_alu_op_fceil = 0x37,
93 midgard_alu_op_ffma = 0x38,
94 midgard_alu_op_fdot3 = 0x3C,
95 midgard_alu_op_fdot3r = 0x3D,
96 midgard_alu_op_fdot4 = 0x3E,
97 midgard_alu_op_freduce = 0x3F,
98
99 midgard_alu_op_iadd = 0x40,
100 midgard_alu_op_ishladd = 0x41, /* a + (b<<1) */
101 midgard_alu_op_isub = 0x46,
102 midgard_alu_op_iaddsat = 0x48,
103 midgard_alu_op_uaddsat = 0x49,
104 midgard_alu_op_isubsat = 0x4E,
105 midgard_alu_op_usubsat = 0x4F,
106
107 midgard_alu_op_imul = 0x58,
108
109 midgard_alu_op_imin = 0x60,
110 midgard_alu_op_umin = 0x61,
111 midgard_alu_op_imax = 0x62,
112 midgard_alu_op_umax = 0x63,
113 midgard_alu_op_ihadd = 0x64,
114 midgard_alu_op_uhadd = 0x65,
115 midgard_alu_op_irhadd = 0x66,
116 midgard_alu_op_urhadd = 0x67,
117 midgard_alu_op_iasr = 0x68,
118 midgard_alu_op_ilsr = 0x69,
119 midgard_alu_op_ishl = 0x6E,
120
121 midgard_alu_op_iand = 0x70,
122 midgard_alu_op_ior = 0x71,
123 midgard_alu_op_inand = 0x72, /* ~(a & b), for inot let a = b */
124 midgard_alu_op_inor = 0x73, /* ~(a | b) */
125 midgard_alu_op_iandnot = 0x74, /* (a & ~b), used for not/b2f */
126 midgard_alu_op_iornot = 0x75, /* (a | ~b) */
127 midgard_alu_op_ixor = 0x76,
128 midgard_alu_op_inxor = 0x77, /* ~(a & b) */
129 midgard_alu_op_iclz = 0x78, /* Number of zeroes on left */
130 midgard_alu_op_ibitcount8 = 0x7A, /* Counts bits in 8-bit increments */
131 midgard_alu_op_imov = 0x7B,
132 midgard_alu_op_iabsdiff = 0x7C,
133 midgard_alu_op_uabsdiff = 0x7D,
134 midgard_alu_op_ichoose = 0x7E, /* vector, component number - dupe for shuffle() */
135
136 midgard_alu_op_feq = 0x80,
137 midgard_alu_op_fne = 0x81,
138 midgard_alu_op_flt = 0x82,
139 midgard_alu_op_fle = 0x83,
140 midgard_alu_op_fball_eq = 0x88,
141 midgard_alu_op_fball_neq = 0x89,
142 midgard_alu_op_fball_lt = 0x8A, /* all(lessThan(.., ..)) */
143 midgard_alu_op_fball_lte = 0x8B, /* all(lessThanEqual(.., ..)) */
144
145 midgard_alu_op_fbany_eq = 0x90,
146 midgard_alu_op_fbany_neq = 0x91,
147 midgard_alu_op_fbany_lt = 0x92, /* any(lessThan(.., ..)) */
148 midgard_alu_op_fbany_lte = 0x93, /* any(lessThanEqual(.., ..)) */
149
150 midgard_alu_op_f2i_rte = 0x98,
151 midgard_alu_op_f2i_rtz = 0x99,
152 midgard_alu_op_f2i_rtn = 0x9A,
153 midgard_alu_op_f2i_rtp = 0x9B,
154 midgard_alu_op_f2u_rte = 0x9C,
155 midgard_alu_op_f2u_rtz = 0x9D,
156 midgard_alu_op_f2u_rtn = 0x9E,
157 midgard_alu_op_f2u_rtp = 0x9F,
158
159 midgard_alu_op_ieq = 0xA0,
160 midgard_alu_op_ine = 0xA1,
161 midgard_alu_op_ult = 0xA2,
162 midgard_alu_op_ule = 0xA3,
163 midgard_alu_op_ilt = 0xA4,
164 midgard_alu_op_ile = 0xA5,
165 midgard_alu_op_iball_eq = 0xA8,
166 midgard_alu_op_iball_neq = 0xA9,
167 midgard_alu_op_uball_lt = 0xAA,
168 midgard_alu_op_uball_lte = 0xAB,
169 midgard_alu_op_iball_lt = 0xAC,
170 midgard_alu_op_iball_lte = 0xAD,
171
172 midgard_alu_op_ibany_eq = 0xB0,
173 midgard_alu_op_ibany_neq = 0xB1,
174 midgard_alu_op_ubany_lt = 0xB2,
175 midgard_alu_op_ubany_lte = 0xB3,
176 midgard_alu_op_ibany_lt = 0xB4, /* any(lessThan(.., ..)) */
177 midgard_alu_op_ibany_lte = 0xB5, /* any(lessThanEqual(.., ..)) */
178 midgard_alu_op_i2f_rte = 0xB8,
179 midgard_alu_op_i2f_rtz = 0xB9,
180 midgard_alu_op_i2f_rtn = 0xBA,
181 midgard_alu_op_i2f_rtp = 0xBB,
182 midgard_alu_op_u2f_rte = 0xBC,
183 midgard_alu_op_u2f_rtz = 0xBD,
184 midgard_alu_op_u2f_rtn = 0xBE,
185 midgard_alu_op_u2f_rtp = 0xBF,
186
187 midgard_alu_op_icsel_v = 0xC0, /* condition code r31 */
188 midgard_alu_op_icsel = 0xC1, /* condition code r31.w */
189 midgard_alu_op_fcsel_v = 0xC4,
190 midgard_alu_op_fcsel = 0xC5,
191 midgard_alu_op_fround = 0xC6,
192
193 midgard_alu_op_fatan_pt2 = 0xE8,
194 midgard_alu_op_fpow_pt1 = 0xEC,
195 midgard_alu_op_fpown_pt1 = 0xED,
196 midgard_alu_op_fpowr_pt1 = 0xEE,
197
198 midgard_alu_op_frcp = 0xF0,
199 midgard_alu_op_frsqrt = 0xF2,
200 midgard_alu_op_fsqrt = 0xF3,
201 midgard_alu_op_fexp2 = 0xF4,
202 midgard_alu_op_flog2 = 0xF5,
203 midgard_alu_op_fsin = 0xF6,
204 midgard_alu_op_fcos = 0xF7,
205 midgard_alu_op_fatan2_pt1 = 0xF9,
206 } midgard_alu_op;
207
208 typedef enum {
209 midgard_outmod_none = 0,
210 midgard_outmod_pos = 1,
211 /* 0x2 unknown */
212 midgard_outmod_sat = 3
213 } midgard_outmod_float;
214
215 typedef enum {
216 midgard_outmod_int_saturate = 0,
217 midgard_outmod_uint_saturate = 1,
218 midgard_outmod_int_wrap = 2,
219 midgard_outmod_int_high = 3, /* Overflowed portion */
220 } midgard_outmod_int;
221
222 typedef enum {
223 midgard_reg_mode_8 = 0,
224 midgard_reg_mode_16 = 1,
225 midgard_reg_mode_32 = 2,
226 midgard_reg_mode_64 = 3
227 } midgard_reg_mode;
228
229 typedef enum {
230 midgard_dest_override_lower = 0,
231 midgard_dest_override_upper = 1,
232 midgard_dest_override_none = 2
233 } midgard_dest_override;
234
235 typedef enum {
236 midgard_int_sign_extend = 0,
237 midgard_int_zero_extend = 1,
238 midgard_int_normal = 2,
239 midgard_int_shift = 3
240 } midgard_int_mod;
241
242 #define MIDGARD_FLOAT_MOD_ABS (1 << 0)
243 #define MIDGARD_FLOAT_MOD_NEG (1 << 1)
244
245 typedef struct
246 __attribute__((__packed__))
247 {
248 /* Either midgard_int_mod or from midgard_float_mod_*, depending on the
249 * type of op */
250 unsigned mod : 2;
251
252 /* replicate lower half if dest = half, or low/high half selection if
253 * dest = full
254 */
255 bool rep_low : 1;
256 bool rep_high : 1; /* unused if dest = full */
257 bool half : 1; /* only matters if dest = full */
258 unsigned swizzle : 8;
259 }
260 midgard_vector_alu_src;
261
262 typedef struct
263 __attribute__((__packed__))
264 {
265 midgard_alu_op op : 8;
266 midgard_reg_mode reg_mode : 2;
267 unsigned src1 : 13;
268 unsigned src2 : 13;
269 midgard_dest_override dest_override : 2;
270 unsigned outmod : 2;
271 unsigned mask : 8;
272 }
273 midgard_vector_alu;
274
275 typedef struct
276 __attribute__((__packed__))
277 {
278 bool abs : 1;
279 bool negate : 1;
280 bool full : 1; /* 0 = half, 1 = full */
281 unsigned component : 3;
282 }
283 midgard_scalar_alu_src;
284
285 typedef struct
286 __attribute__((__packed__))
287 {
288 midgard_alu_op op : 8;
289 unsigned src1 : 6;
290 unsigned src2 : 11;
291 unsigned unknown : 1;
292 unsigned outmod : 2;
293 bool output_full : 1;
294 unsigned output_component : 3;
295 }
296 midgard_scalar_alu;
297
298 typedef struct
299 __attribute__((__packed__))
300 {
301 unsigned src1_reg : 5;
302 unsigned src2_reg : 5;
303 unsigned out_reg : 5;
304 bool src2_imm : 1;
305 }
306 midgard_reg_info;
307
308 /* In addition to conditional branches and jumps (unconditional branches),
309 * Midgard implements a bit of fixed function functionality used in fragment
310 * shaders via specially crafted branches. These have special branch opcodes,
311 * which perform a fixed-function operation and/or use the results of a
312 * fixed-function operation as the branch condition. */
313
314 typedef enum {
315 /* Regular branches */
316 midgard_jmp_writeout_op_branch_uncond = 1,
317 midgard_jmp_writeout_op_branch_cond = 2,
318
319 /* In a fragment shader, execute a discard_if instruction, with the
320 * corresponding condition code. Terminates the shader, so generally
321 * set the branch target to out of the shader */
322 midgard_jmp_writeout_op_discard = 4,
323
324 /* Branch if the tilebuffer is not yet ready. At the beginning of a
325 * fragment shader that reads from the tile buffer, for instance via
326 * ARM_shader_framebuffer_fetch or EXT_pixel_local_storage, this branch
327 * operation should be used as a loop. An instruction like
328 * "br.tilebuffer.always -1" does the trick, corresponding to
329 * "while(!is_tilebuffer_ready) */
330 midgard_jmp_writeout_op_tilebuffer_pending = 6,
331
332 /* In a fragment shader, try to write out the value pushed to r0 to the
333 * tilebuffer, subject to unknown state in r1.z and r1.w. If this
334 * succeeds, the shader terminates. If it fails, it branches to the
335 * specified branch target. Generally, this should be used in a loop to
336 * itself, acting as "do { write(r0); } while(!write_successful);" */
337 midgard_jmp_writeout_op_writeout = 7,
338 } midgard_jmp_writeout_op;
339
340 typedef enum {
341 midgard_condition_write0 = 0,
342
343 /* These condition codes denote a conditional branch on FALSE and on
344 * TRUE respectively */
345 midgard_condition_false = 1,
346 midgard_condition_true = 2,
347
348 /* This condition code always branches. For a pure branch, the
349 * unconditional branch coding should be used instead, but for
350 * fixed-function branch opcodes, this is still useful */
351 midgard_condition_always = 3,
352 } midgard_condition;
353
354 typedef struct
355 __attribute__((__packed__))
356 {
357 midgard_jmp_writeout_op op : 3; /* == branch_uncond */
358 unsigned dest_tag : 4; /* tag of branch destination */
359 unsigned unknown : 2;
360 int offset : 7;
361 }
362 midgard_branch_uncond;
363
364 typedef struct
365 __attribute__((__packed__))
366 {
367 midgard_jmp_writeout_op op : 3; /* == branch_cond */
368 unsigned dest_tag : 4; /* tag of branch destination */
369 int offset : 7;
370 midgard_condition cond : 2;
371 }
372 midgard_branch_cond;
373
374 typedef struct
375 __attribute__((__packed__))
376 {
377 midgard_jmp_writeout_op op : 3; /* == branch_cond */
378 unsigned dest_tag : 4; /* tag of branch destination */
379 unsigned unknown : 2;
380 signed offset : 23;
381
382 /* Extended branches permit inputting up to 4 conditions loaded into
383 * r31 (two in r31.w and two in r31.x). In the most general case, we
384 * specify a function f(A, B, C, D) mapping 4 1-bit conditions to a
385 * single 1-bit branch criteria. Note that the domain of f has 2^(2^4)
386 * elements, each mapping to 1-bit of output, so we can trivially
387 * construct a Godel numbering of f as a (2^4)=16-bit integer. This
388 * 16-bit integer serves as a lookup table to compute f, subject to
389 * some swaps for ordering.
390 *
391 * Interesting, the standard 2-bit condition codes are also a LUT with
392 * the same format (2^1-bit), but it's usually easier to use enums. */
393
394 unsigned cond : 16;
395 }
396 midgard_branch_extended;
397
398 typedef struct
399 __attribute__((__packed__))
400 {
401 midgard_jmp_writeout_op op : 3; /* == writeout */
402 unsigned unknown : 13;
403 }
404 midgard_writeout;
405
406 /*
407 * Load/store words
408 */
409
410 typedef enum {
411 midgard_op_ld_st_noop = 0x03,
412
413 /* Unpack a colour from a native format to fp16 */
414 midgard_op_unpack_colour = 0x05,
415
416 /* Packs a colour from fp16 to a native format */
417 midgard_op_pack_colour = 0x09,
418
419 /* Unclear why this is on the L/S unit, but moves fp32 cube map
420 * coordinates in r27 to its cube map texture coordinate destination
421 * (e.g r29). */
422
423 midgard_op_ld_cubemap_coords = 0x0E,
424
425 /* Loads a global/local/group ID, depending on arguments */
426 midgard_op_ld_compute_id = 0x10,
427
428 /* The L/S unit can do perspective division a clock faster than the ALU
429 * if you're lucky. Put the vec4 in r27, and call with 0x24 as the
430 * unknown state; the output will be <x/w, y/w, z/w, 1>. Replace w with
431 * z for the z version */
432 midgard_op_ldst_perspective_division_z = 0x12,
433 midgard_op_ldst_perspective_division_w = 0x13,
434
435 /* val in r27.y, address embedded, outputs result to argument. Invert val for sub. Let val = +-1 for inc/dec. */
436 midgard_op_atomic_add = 0x40,
437 midgard_op_atomic_add64 = 0x41,
438
439 midgard_op_atomic_and = 0x44,
440 midgard_op_atomic_and64 = 0x45,
441 midgard_op_atomic_or = 0x48,
442 midgard_op_atomic_or64 = 0x49,
443 midgard_op_atomic_xor = 0x4C,
444 midgard_op_atomic_xor64 = 0x4D,
445
446 midgard_op_atomic_imin = 0x50,
447 midgard_op_atomic_imin64 = 0x51,
448 midgard_op_atomic_umin = 0x54,
449 midgard_op_atomic_umin64 = 0x55,
450 midgard_op_atomic_imax = 0x58,
451 midgard_op_atomic_imax64 = 0x59,
452 midgard_op_atomic_umax = 0x5C,
453 midgard_op_atomic_umax64 = 0x5D,
454
455 midgard_op_atomic_xchg = 0x60,
456 midgard_op_atomic_xchg64 = 0x61,
457
458 /* Used for compute shader's __global arguments, __local variables (or
459 * for register spilling) */
460
461 midgard_op_ld_char = 0x81,
462 midgard_op_ld_char2 = 0x84,
463 midgard_op_ld_short = 0x85,
464 midgard_op_ld_char4 = 0x88, /* short2, int, float */
465 midgard_op_ld_short4 = 0x8C, /* int2, float2, long */
466 midgard_op_ld_int4 = 0x90, /* float4, long2 */
467
468 midgard_op_ld_attr_32 = 0x94,
469 midgard_op_ld_attr_16 = 0x95,
470 midgard_op_ld_attr_32u = 0x96,
471 midgard_op_ld_attr_32i = 0x97,
472 midgard_op_ld_vary_32 = 0x98,
473 midgard_op_ld_vary_16 = 0x99,
474 midgard_op_ld_vary_32u = 0x9A,
475 midgard_op_ld_vary_32i = 0x9B,
476
477 /* Old version of midgard_op_ld_color_buffer_u8_as_fp16, for T720 */
478 midgard_op_ld_color_buffer_u8_as_fp16_old = 0x9D,
479
480 /* The distinction between these ops is the alignment requirement /
481 * accompanying shift. Thus, the offset to ld_ubo_int4 is in 16-byte
482 * units and can load 128-bit. The offset to ld_ubo_short4 is in 8-byte
483 * units; ld_ubo_char4 in 4-byte units. ld_ubo_char/ld_ubo_char2 are
484 * purely theoretical (never seen in the wild) since int8/int16/fp16
485 * UBOs don't really exist. The ops are still listed to maintain
486 * symmetry with generic I/O ops. */
487
488 midgard_op_ld_ubo_char = 0xA0, /* theoretical */
489 midgard_op_ld_ubo_char2 = 0xA4, /* theoretical */
490 midgard_op_ld_ubo_char4 = 0xA8,
491 midgard_op_ld_ubo_short4 = 0xAC,
492 midgard_op_ld_ubo_int4 = 0xB0,
493
494 /* New-style blending ops. Works on T760/T860 */
495 midgard_op_ld_color_buffer_u8_as_fp16 = 0xB9,
496 midgard_op_ld_color_buffer_32u = 0xBA,
497
498 midgard_op_st_char = 0xC0,
499 midgard_op_st_char2 = 0xC4, /* short */
500 midgard_op_st_char4 = 0xC8, /* short2, int, float */
501 midgard_op_st_short4 = 0xCC, /* int2, float2, long */
502 midgard_op_st_int4 = 0xD0, /* float4, long2 */
503
504 midgard_op_st_vary_32 = 0xD4,
505 midgard_op_st_vary_16 = 0xD5,
506 midgard_op_st_vary_32u = 0xD6,
507 midgard_op_st_vary_32i = 0xD7,
508
509 /* Value to st in r27, location r26.w as short2 */
510 midgard_op_st_image_f = 0xD8,
511 midgard_op_st_image_ui = 0xDA,
512 midgard_op_st_image_i = 0xDB,
513 } midgard_load_store_op;
514
515 typedef enum {
516 midgard_interp_centroid = 1,
517 midgard_interp_default = 2
518 } midgard_interpolation;
519
520 typedef enum {
521 midgard_varying_mod_none = 0,
522
523 /* Other values unknown */
524
525 /* Take the would-be result and divide all components by its z/w
526 * (perspective division baked in with the load) */
527 midgard_varying_mod_perspective_z = 2,
528 midgard_varying_mod_perspective_w = 3,
529 } midgard_varying_modifier;
530
531 typedef struct
532 __attribute__((__packed__))
533 {
534 unsigned zero0 : 1; /* Always zero */
535
536 midgard_varying_modifier modifier : 2;
537
538 unsigned zero1: 1; /* Always zero */
539
540 /* Varying qualifiers, zero if not a varying */
541 unsigned flat : 1;
542 unsigned is_varying : 1; /* Always one for varying, but maybe something else? */
543 midgard_interpolation interpolation : 2;
544
545 unsigned zero2 : 2; /* Always zero */
546 }
547 midgard_varying_parameter;
548
549 /* 8-bit register/etc selector for load/store ops */
550 typedef struct
551 __attribute__((__packed__))
552 {
553 /* Indexes into the register */
554 unsigned component : 2;
555
556 /* Register select between r26/r27 */
557 unsigned select : 1;
558
559 unsigned unknown : 2;
560
561 /* Like any good Arm instruction set, load/store arguments can be
562 * implicitly left-shifted... but only the second argument. Zero for no
563 * shifting, up to <<7 possible though. This is useful for indexing.
564 *
565 * For the first argument, it's unknown what these bits mean */
566 unsigned shift : 3;
567 }
568 midgard_ldst_register_select;
569
570 typedef struct
571 __attribute__((__packed__))
572 {
573 midgard_load_store_op op : 8;
574 unsigned reg : 5;
575 unsigned mask : 4;
576 unsigned swizzle : 8;
577
578 /* Load/store ops can take two additional registers as arguments, but
579 * these are limited to load/store registers with only a few supported
580 * mask/swizzle combinations. The tradeoff is these are much more
581 * compact, requiring 8-bits each rather than 17-bits for a full
582 * reg/mask/swizzle. Usually (?) encoded as
583 * midgard_ldst_register_select. */
584 unsigned arg_1 : 8;
585 unsigned arg_2 : 8;
586
587 unsigned varying_parameters : 10;
588
589 unsigned address : 9;
590 }
591 midgard_load_store_word;
592
593 typedef struct
594 __attribute__((__packed__))
595 {
596 unsigned type : 4;
597 unsigned next_type : 4;
598 uint64_t word1 : 60;
599 uint64_t word2 : 60;
600 }
601 midgard_load_store;
602
603 /* 8-bit register selector used in texture ops to select a bias/LOD/gradient
604 * register, shoved into the `bias` field */
605
606 typedef struct
607 __attribute__((__packed__))
608 {
609 /* 32-bit register, clear for half-register */
610 unsigned full : 1;
611
612 /* Register select between r28/r29 */
613 unsigned select : 1;
614
615 /* For a half-register, selects the upper half */
616 unsigned upper : 1;
617
618 /* Indexes into the register */
619 unsigned component : 2;
620
621 /* Padding to make this 8-bit */
622 unsigned zero : 3;
623 }
624 midgard_tex_register_select;
625
626 /* Texture pipeline results are in r28-r29 */
627 #define REG_TEX_BASE 28
628
629 /* Texture opcodes... maybe? */
630 #define TEXTURE_OP_NORMAL 0x11 /* texture */
631 #define TEXTURE_OP_LOD 0x12 /* textureLod */
632 #define TEXTURE_OP_TEXEL_FETCH 0x14 /* texelFetch */
633
634 /* Implements barrier() */
635 #define TEXTURE_OP_BARRIER 0x0B
636
637 /* Computes horizontal and vertical derivatives respectively. Use with a float
638 * sampler and a "2D" texture. Leave texture/sampler IDs as zero; they ought
639 * to be ignored. Only works for fp32 on 64-bit at a time, so derivatives of a
640 * vec4 require 2 texture ops. For some reason, the blob computes both X and Y
641 * derivatives at the same time and just throws out whichever is unused; it's
642 * not known if this is a quirk of the hardware or just of the blob. */
643
644 #define TEXTURE_OP_DFDX 0x0D
645 #define TEXTURE_OP_DFDY 0x1D
646
647 enum mali_sampler_type {
648 MALI_SAMPLER_UNK = 0x0,
649 MALI_SAMPLER_FLOAT = 0x1, /* sampler */
650 MALI_SAMPLER_UNSIGNED = 0x2, /* usampler */
651 MALI_SAMPLER_SIGNED = 0x3, /* isampler */
652 };
653
654 typedef struct
655 __attribute__((__packed__))
656 {
657 unsigned type : 4;
658 unsigned next_type : 4;
659
660 unsigned op : 6;
661 unsigned shadow : 1;
662 unsigned is_gather : 1;
663
664 /* A little obscure, but last is set for the last texture operation in
665 * a shader. cont appears to just be last's opposite (?). Yeah, I know,
666 * kind of funky.. BiOpen thinks it could do with memory hinting, or
667 * tile locking? */
668
669 unsigned cont : 1;
670 unsigned last : 1;
671
672 enum mali_texture_type format : 2;
673
674 /* Are sampler_handle/texture_handler respectively set by registers? If
675 * true, the lower 8-bits of the respective field is a register word.
676 * If false, they are an immediate */
677
678 unsigned sampler_register : 1;
679 unsigned texture_register : 1;
680
681 /* Is a register used to specify the
682 * LOD/bias/offset? If set, use the `bias` field as
683 * a register index. If clear, use the `bias` field
684 * as an immediate. */
685 unsigned lod_register : 1;
686
687 /* Is a register used to specify an offset? If set, use the
688 * offset_reg_* fields to encode this, duplicated for each of the
689 * components. If clear, there is implcitly always an immediate offst
690 * specificed in offset_imm_* */
691 unsigned offset_register : 1;
692
693 unsigned in_reg_full : 1;
694 unsigned in_reg_select : 1;
695 unsigned in_reg_upper : 1;
696 unsigned in_reg_swizzle : 8;
697
698 unsigned unknown8 : 2;
699
700 unsigned out_full : 1;
701
702 enum mali_sampler_type sampler_type : 2;
703
704 unsigned out_reg_select : 1;
705 unsigned out_upper : 1;
706
707 unsigned mask : 4;
708
709 /* Intriguingly, textures can take an outmod just like textures. Int
710 * outmods are not supported as far as I can tell, so this is only
711 * meaningful for float samplers */
712 midgard_outmod_float outmod : 2;
713
714 unsigned swizzle : 8;
715
716 /* For barriers, control barriers are implied regardless, but these
717 * bits also enable memory barriers of various types. For regular
718 * textures, these bits are not yet understood. */
719 unsigned barrier_buffer : 1;
720 unsigned barrier_shared : 1;
721 unsigned barrier_stack : 1;
722
723 unsigned unknown4 : 9;
724
725 /* In immediate mode, each offset field is an immediate range [0, 7].
726 *
727 * In register mode, offset_x becomes a register full / select / upper
728 * triplet followed by a vec3 swizzle is splattered across
729 * offset_y/offset_z in a genuinely bizarre way.
730 *
731 * For texel fetches in immediate mode, the range is the full [-8, 7],
732 * but for normal texturing the top bit must be zero and a register
733 * used instead. It's not clear where this limitation is from.
734 *
735 * union {
736 * struct {
737 * signed offset_x : 4;
738 * signed offset_y : 4;
739 * signed offset_z : 4;
740 * } immediate;
741 * struct {
742 * bool full : 1;
743 * bool select : 1;
744 * bool upper : 1;
745 * unsigned swizzle : 8;
746 * unsigned zero : 1;
747 * } register;
748 * }
749 */
750
751 unsigned offset : 12;
752
753 /* In immediate bias mode, for a normal texture op, this is
754 * texture bias, computed as int(2^8 * frac(biasf)), with
755 * bias_int = floor(bias). For a textureLod, it's that, but
756 * s/bias/lod. For a texel fetch, this is the LOD as-is.
757 *
758 * In register mode, this is a midgard_tex_register_select
759 * structure and bias_int is zero */
760
761 unsigned bias : 8;
762 signed bias_int : 8;
763
764 /* If sampler/texture_register is set, the bottom 8-bits are
765 * midgard_tex_register_select and the top 8-bits are zero. If they are
766 * clear, they are immediate texture indices */
767
768 unsigned sampler_handle : 16;
769 unsigned texture_handle : 16;
770 }
771 midgard_texture_word;
772
773 /* Technically barriers are texture instructions but it's less work to add them
774 * as an explicitly zeroed special case, since most fields are forced to go to
775 * zero */
776
777 typedef struct
778 __attribute__((__packed__))
779 {
780 unsigned type : 4;
781 unsigned next_type : 4;
782
783 /* op = TEXTURE_OP_BARRIER */
784 unsigned op : 6;
785 unsigned zero1 : 2;
786
787 /* Since helper invocations don't make any sense, these are forced to one */
788 unsigned cont : 1;
789 unsigned last : 1;
790 unsigned zero2 : 14;
791
792 unsigned zero3 : 24;
793 unsigned buffer : 1;
794 unsigned shared : 1;
795 unsigned stack : 1;
796 unsigned zero4 : 5;
797
798 uint64_t zero5;
799 } midgard_texture_barrier_word;
800
801 typedef union midgard_constants {
802 double f64[2];
803 uint64_t u64[2];
804 int64_t i64[2];
805 float f32[4];
806 uint32_t u32[4];
807 int32_t i32[4];
808 uint16_t f16[8];
809 uint16_t u16[8];
810 int16_t i16[8];
811 uint8_t u8[16];
812 int8_t i8[16];
813 }
814 midgard_constants;
815
816 #endif