nir: Remove always-true assert
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* The last serialized type. */
55 const struct glsl_type *last_type;
56 const struct glsl_type *last_interface_type;
57 struct nir_variable_data last_var_data;
58
59 /* For skipping equal ALU headers (typical after scalarization). */
60 nir_instr_type last_instr_type;
61 uintptr_t last_alu_header_offset;
62
63 /* Don't write optional data such as variable names. */
64 bool strip;
65 } write_ctx;
66
67 typedef struct {
68 nir_shader *nir;
69
70 struct blob_reader *blob;
71
72 /* the next index to assign to a NIR in-memory object */
73 uint32_t next_idx;
74
75 /* The length of the index -> object table */
76 uint32_t idx_table_len;
77
78 /* map from index to deserialized pointer */
79 void **idx_table;
80
81 /* List of phi sources. */
82 struct list_head phi_srcs;
83
84 /* The last deserialized type. */
85 const struct glsl_type *last_type;
86 const struct glsl_type *last_interface_type;
87 struct nir_variable_data last_var_data;
88 } read_ctx;
89
90 static void
91 write_add_object(write_ctx *ctx, const void *obj)
92 {
93 uint32_t index = ctx->next_idx++;
94 assert(index != MAX_OBJECT_IDS);
95 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96 }
97
98 static uint32_t
99 write_lookup_object(write_ctx *ctx, const void *obj)
100 {
101 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102 assert(entry);
103 return (uint32_t)(uintptr_t) entry->data;
104 }
105
106 static void
107 read_add_object(read_ctx *ctx, void *obj)
108 {
109 assert(ctx->next_idx < ctx->idx_table_len);
110 ctx->idx_table[ctx->next_idx++] = obj;
111 }
112
113 static void *
114 read_lookup_object(read_ctx *ctx, uint32_t idx)
115 {
116 assert(idx < ctx->idx_table_len);
117 return ctx->idx_table[idx];
118 }
119
120 static void *
121 read_object(read_ctx *ctx)
122 {
123 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124 }
125
126 static uint32_t
127 encode_bit_size_3bits(uint8_t bit_size)
128 {
129 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131 if (bit_size)
132 return util_logbase2(bit_size) + 1;
133 return 0;
134 }
135
136 static uint8_t
137 decode_bit_size_3bits(uint8_t bit_size)
138 {
139 if (bit_size)
140 return 1 << (bit_size - 1);
141 return 0;
142 }
143
144 #define NUM_COMPONENTS_IS_SEPARATE_7 7
145
146 static uint8_t
147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149 if (num_components <= 4)
150 return num_components;
151 if (num_components == 8)
152 return 5;
153 if (num_components == 16)
154 return 6;
155
156 /* special value indicating that num_components is in the next uint32 */
157 return NUM_COMPONENTS_IS_SEPARATE_7;
158 }
159
160 static uint8_t
161 decode_num_components_in_3bits(uint8_t value)
162 {
163 if (value <= 4)
164 return value;
165 if (value == 5)
166 return 8;
167 if (value == 6)
168 return 16;
169
170 unreachable("invalid num_components encoding");
171 return 0;
172 }
173
174 static void
175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178 blob_write_uint32(ctx->blob, c->num_elements);
179 for (unsigned i = 0; i < c->num_elements; i++)
180 write_constant(ctx, c->elements[i]);
181 }
182
183 static nir_constant *
184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186 nir_constant *c = ralloc(nvar, nir_constant);
187
188 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189 c->num_elements = blob_read_uint32(ctx->blob);
190 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191 for (unsigned i = 0; i < c->num_elements; i++)
192 c->elements[i] = read_constant(ctx, nvar);
193
194 return c;
195 }
196
197 enum var_data_encoding {
198 var_encode_full,
199 var_encode_shader_temp,
200 var_encode_function_temp,
201 var_encode_location_diff,
202 };
203
204 union packed_var {
205 uint32_t u32;
206 struct {
207 unsigned has_name:1;
208 unsigned has_constant_initializer:1;
209 unsigned has_interface_type:1;
210 unsigned num_state_slots:7;
211 unsigned data_encoding:2;
212 unsigned type_same_as_last:1;
213 unsigned interface_type_same_as_last:1;
214 unsigned _pad:2;
215 unsigned num_members:16;
216 } u;
217 };
218
219 union packed_var_data_diff {
220 uint32_t u32;
221 struct {
222 int location:13;
223 int location_frac:3;
224 int driver_location:16;
225 } u;
226 };
227
228 static void
229 write_variable(write_ctx *ctx, const nir_variable *var)
230 {
231 write_add_object(ctx, var);
232
233 assert(var->num_state_slots < (1 << 7));
234
235 STATIC_ASSERT(sizeof(union packed_var) == 4);
236 union packed_var flags;
237 flags.u32 = 0;
238
239 flags.u.has_name = !ctx->strip && var->name;
240 flags.u.has_constant_initializer = !!(var->constant_initializer);
241 flags.u.has_interface_type = !!(var->interface_type);
242 flags.u.type_same_as_last = var->type == ctx->last_type;
243 flags.u.interface_type_same_as_last =
244 var->interface_type && var->interface_type == ctx->last_interface_type;
245 flags.u.num_state_slots = var->num_state_slots;
246 flags.u.num_members = var->num_members;
247
248 struct nir_variable_data data = var->data;
249
250 /* When stripping, we expect that the location is no longer needed,
251 * which is typically after shaders are linked.
252 */
253 if (ctx->strip &&
254 data.mode != nir_var_shader_in &&
255 data.mode != nir_var_shader_out)
256 data.location = 0;
257
258 /* Temporary variables don't serialize var->data. */
259 if (data.mode == nir_var_shader_temp)
260 flags.u.data_encoding = var_encode_shader_temp;
261 else if (data.mode == nir_var_function_temp)
262 flags.u.data_encoding = var_encode_function_temp;
263 else {
264 struct nir_variable_data tmp = data;
265
266 tmp.location = ctx->last_var_data.location;
267 tmp.location_frac = ctx->last_var_data.location_frac;
268 tmp.driver_location = ctx->last_var_data.driver_location;
269
270 /* See if we can encode only the difference in locations from the last
271 * variable.
272 */
273 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
274 abs((int)data.location -
275 (int)ctx->last_var_data.location) < (1 << 12) &&
276 abs((int)data.driver_location -
277 (int)ctx->last_var_data.driver_location) < (1 << 15))
278 flags.u.data_encoding = var_encode_location_diff;
279 else
280 flags.u.data_encoding = var_encode_full;
281 }
282
283 blob_write_uint32(ctx->blob, flags.u32);
284
285 if (!flags.u.type_same_as_last) {
286 encode_type_to_blob(ctx->blob, var->type);
287 ctx->last_type = var->type;
288 }
289
290 if (var->interface_type && !flags.u.interface_type_same_as_last) {
291 encode_type_to_blob(ctx->blob, var->interface_type);
292 ctx->last_interface_type = var->interface_type;
293 }
294
295 if (flags.u.has_name)
296 blob_write_string(ctx->blob, var->name);
297
298 if (flags.u.data_encoding == var_encode_full ||
299 flags.u.data_encoding == var_encode_location_diff) {
300 if (flags.u.data_encoding == var_encode_full) {
301 blob_write_bytes(ctx->blob, &data, sizeof(data));
302 } else {
303 /* Serialize only the difference in locations from the last variable.
304 */
305 union packed_var_data_diff diff;
306
307 diff.u.location = data.location - ctx->last_var_data.location;
308 diff.u.location_frac = data.location_frac -
309 ctx->last_var_data.location_frac;
310 diff.u.driver_location = data.driver_location -
311 ctx->last_var_data.driver_location;
312
313 blob_write_uint32(ctx->blob, diff.u32);
314 }
315
316 ctx->last_var_data = data;
317 }
318
319 for (unsigned i = 0; i < var->num_state_slots; i++) {
320 blob_write_bytes(ctx->blob, &var->state_slots[i],
321 sizeof(var->state_slots[i]));
322 }
323 if (var->constant_initializer)
324 write_constant(ctx, var->constant_initializer);
325 if (var->num_members > 0) {
326 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
327 var->num_members * sizeof(*var->members));
328 }
329 }
330
331 static nir_variable *
332 read_variable(read_ctx *ctx)
333 {
334 nir_variable *var = rzalloc(ctx->nir, nir_variable);
335 read_add_object(ctx, var);
336
337 union packed_var flags;
338 flags.u32 = blob_read_uint32(ctx->blob);
339
340 if (flags.u.type_same_as_last) {
341 var->type = ctx->last_type;
342 } else {
343 var->type = decode_type_from_blob(ctx->blob);
344 ctx->last_type = var->type;
345 }
346
347 if (flags.u.has_interface_type) {
348 if (flags.u.interface_type_same_as_last) {
349 var->interface_type = ctx->last_interface_type;
350 } else {
351 var->interface_type = decode_type_from_blob(ctx->blob);
352 ctx->last_interface_type = var->interface_type;
353 }
354 }
355
356 if (flags.u.has_name) {
357 const char *name = blob_read_string(ctx->blob);
358 var->name = ralloc_strdup(var, name);
359 } else {
360 var->name = NULL;
361 }
362
363 if (flags.u.data_encoding == var_encode_shader_temp)
364 var->data.mode = nir_var_shader_temp;
365 else if (flags.u.data_encoding == var_encode_function_temp)
366 var->data.mode = nir_var_function_temp;
367 else if (flags.u.data_encoding == var_encode_full) {
368 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
369 ctx->last_var_data = var->data;
370 } else { /* var_encode_location_diff */
371 union packed_var_data_diff diff;
372 diff.u32 = blob_read_uint32(ctx->blob);
373
374 var->data = ctx->last_var_data;
375 var->data.location += diff.u.location;
376 var->data.location_frac += diff.u.location_frac;
377 var->data.driver_location += diff.u.driver_location;
378
379 ctx->last_var_data = var->data;
380 }
381
382 var->num_state_slots = flags.u.num_state_slots;
383 if (var->num_state_slots != 0) {
384 var->state_slots = ralloc_array(var, nir_state_slot,
385 var->num_state_slots);
386 for (unsigned i = 0; i < var->num_state_slots; i++) {
387 blob_copy_bytes(ctx->blob, &var->state_slots[i],
388 sizeof(var->state_slots[i]));
389 }
390 }
391 if (flags.u.has_constant_initializer)
392 var->constant_initializer = read_constant(ctx, var);
393 else
394 var->constant_initializer = NULL;
395 var->num_members = flags.u.num_members;
396 if (var->num_members > 0) {
397 var->members = ralloc_array(var, struct nir_variable_data,
398 var->num_members);
399 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
400 var->num_members * sizeof(*var->members));
401 }
402
403 return var;
404 }
405
406 static void
407 write_var_list(write_ctx *ctx, const struct exec_list *src)
408 {
409 blob_write_uint32(ctx->blob, exec_list_length(src));
410 foreach_list_typed(nir_variable, var, node, src) {
411 write_variable(ctx, var);
412 }
413 }
414
415 static void
416 read_var_list(read_ctx *ctx, struct exec_list *dst)
417 {
418 exec_list_make_empty(dst);
419 unsigned num_vars = blob_read_uint32(ctx->blob);
420 for (unsigned i = 0; i < num_vars; i++) {
421 nir_variable *var = read_variable(ctx);
422 exec_list_push_tail(dst, &var->node);
423 }
424 }
425
426 static void
427 write_register(write_ctx *ctx, const nir_register *reg)
428 {
429 write_add_object(ctx, reg);
430 blob_write_uint32(ctx->blob, reg->num_components);
431 blob_write_uint32(ctx->blob, reg->bit_size);
432 blob_write_uint32(ctx->blob, reg->num_array_elems);
433 blob_write_uint32(ctx->blob, reg->index);
434 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
435 if (!ctx->strip && reg->name)
436 blob_write_string(ctx->blob, reg->name);
437 }
438
439 static nir_register *
440 read_register(read_ctx *ctx)
441 {
442 nir_register *reg = ralloc(ctx->nir, nir_register);
443 read_add_object(ctx, reg);
444 reg->num_components = blob_read_uint32(ctx->blob);
445 reg->bit_size = blob_read_uint32(ctx->blob);
446 reg->num_array_elems = blob_read_uint32(ctx->blob);
447 reg->index = blob_read_uint32(ctx->blob);
448 bool has_name = blob_read_uint32(ctx->blob);
449 if (has_name) {
450 const char *name = blob_read_string(ctx->blob);
451 reg->name = ralloc_strdup(reg, name);
452 } else {
453 reg->name = NULL;
454 }
455
456 list_inithead(&reg->uses);
457 list_inithead(&reg->defs);
458 list_inithead(&reg->if_uses);
459
460 return reg;
461 }
462
463 static void
464 write_reg_list(write_ctx *ctx, const struct exec_list *src)
465 {
466 blob_write_uint32(ctx->blob, exec_list_length(src));
467 foreach_list_typed(nir_register, reg, node, src)
468 write_register(ctx, reg);
469 }
470
471 static void
472 read_reg_list(read_ctx *ctx, struct exec_list *dst)
473 {
474 exec_list_make_empty(dst);
475 unsigned num_regs = blob_read_uint32(ctx->blob);
476 for (unsigned i = 0; i < num_regs; i++) {
477 nir_register *reg = read_register(ctx);
478 exec_list_push_tail(dst, &reg->node);
479 }
480 }
481
482 union packed_src {
483 uint32_t u32;
484 struct {
485 unsigned is_ssa:1; /* <-- Header */
486 unsigned is_indirect:1;
487 unsigned object_idx:20;
488 unsigned _footer:10; /* <-- Footer */
489 } any;
490 struct {
491 unsigned _header:22; /* <-- Header */
492 unsigned negate:1; /* <-- Footer */
493 unsigned abs:1;
494 unsigned swizzle_x:2;
495 unsigned swizzle_y:2;
496 unsigned swizzle_z:2;
497 unsigned swizzle_w:2;
498 } alu;
499 struct {
500 unsigned _header:22; /* <-- Header */
501 unsigned src_type:5; /* <-- Footer */
502 unsigned _pad:5;
503 } tex;
504 };
505
506 static void
507 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
508 {
509 /* Since sources are very frequent, we try to save some space when storing
510 * them. In particular, we store whether the source is a register and
511 * whether the register has an indirect index in the low two bits. We can
512 * assume that the high two bits of the index are zero, since otherwise our
513 * address space would've been exhausted allocating the remap table!
514 */
515 header.any.is_ssa = src->is_ssa;
516 if (src->is_ssa) {
517 header.any.object_idx = write_lookup_object(ctx, src->ssa);
518 blob_write_uint32(ctx->blob, header.u32);
519 } else {
520 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
521 header.any.is_indirect = !!src->reg.indirect;
522 blob_write_uint32(ctx->blob, header.u32);
523 blob_write_uint32(ctx->blob, src->reg.base_offset);
524 if (src->reg.indirect) {
525 union packed_src header = {0};
526 write_src_full(ctx, src->reg.indirect, header);
527 }
528 }
529 }
530
531 static void
532 write_src(write_ctx *ctx, const nir_src *src)
533 {
534 union packed_src header = {0};
535 write_src_full(ctx, src, header);
536 }
537
538 static union packed_src
539 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
540 {
541 STATIC_ASSERT(sizeof(union packed_src) == 4);
542 union packed_src header;
543 header.u32 = blob_read_uint32(ctx->blob);
544
545 src->is_ssa = header.any.is_ssa;
546 if (src->is_ssa) {
547 src->ssa = read_lookup_object(ctx, header.any.object_idx);
548 } else {
549 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
550 src->reg.base_offset = blob_read_uint32(ctx->blob);
551 if (header.any.is_indirect) {
552 src->reg.indirect = ralloc(mem_ctx, nir_src);
553 read_src(ctx, src->reg.indirect, mem_ctx);
554 } else {
555 src->reg.indirect = NULL;
556 }
557 }
558 return header;
559 }
560
561 union packed_dest {
562 uint8_t u8;
563 struct {
564 uint8_t is_ssa:1;
565 uint8_t has_name:1;
566 uint8_t num_components:3;
567 uint8_t bit_size:3;
568 } ssa;
569 struct {
570 uint8_t is_ssa:1;
571 uint8_t is_indirect:1;
572 uint8_t _pad:6;
573 } reg;
574 };
575
576 enum intrinsic_const_indices_encoding {
577 /* Use the 9 bits of packed_const_indices to store 1-9 indices.
578 * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
579 * 4 2-bit indices, or 5-9 1-bit indices.
580 *
581 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
582 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
583 */
584 const_indices_9bit_all_combined,
585
586 const_indices_8bit, /* 8 bits per element */
587 const_indices_16bit, /* 16 bits per element */
588 const_indices_32bit, /* 32 bits per element */
589 };
590
591 enum load_const_packing {
592 /* Constants are not packed and are stored in following dwords. */
593 load_const_full,
594
595 /* packed_value contains high 19 bits, low bits are 0,
596 * good for floating-point decimals
597 */
598 load_const_scalar_hi_19bits,
599
600 /* packed_value contains low 19 bits, high bits are sign-extended */
601 load_const_scalar_lo_19bits_sext,
602 };
603
604 union packed_instr {
605 uint32_t u32;
606 struct {
607 unsigned instr_type:4; /* always present */
608 unsigned _pad:20;
609 unsigned dest:8; /* always last */
610 } any;
611 struct {
612 unsigned instr_type:4;
613 unsigned exact:1;
614 unsigned no_signed_wrap:1;
615 unsigned no_unsigned_wrap:1;
616 unsigned saturate:1;
617 /* Reg: writemask; SSA: swizzles for 2 srcs */
618 unsigned writemask_or_two_swizzles:4;
619 unsigned op:9;
620 unsigned packed_src_ssa_16bit:1;
621 /* Scalarized ALUs always have the same header. */
622 unsigned num_followup_alu_sharing_header:2;
623 unsigned dest:8;
624 } alu;
625 struct {
626 unsigned instr_type:4;
627 unsigned deref_type:3;
628 unsigned cast_type_same_as_last:1;
629 unsigned mode:10; /* deref_var redefines this */
630 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
631 unsigned _pad:5; /* deref_var redefines this */
632 unsigned dest:8;
633 } deref;
634 struct {
635 unsigned instr_type:4;
636 unsigned deref_type:3;
637 unsigned _pad:1;
638 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
639 unsigned dest:8;
640 } deref_var;
641 struct {
642 unsigned instr_type:4;
643 unsigned intrinsic:9;
644 unsigned const_indices_encoding:2;
645 unsigned packed_const_indices:9;
646 unsigned dest:8;
647 } intrinsic;
648 struct {
649 unsigned instr_type:4;
650 unsigned last_component:4;
651 unsigned bit_size:3;
652 unsigned packing:2; /* enum load_const_packing */
653 unsigned packed_value:19; /* meaning determined by packing */
654 } load_const;
655 struct {
656 unsigned instr_type:4;
657 unsigned last_component:4;
658 unsigned bit_size:3;
659 unsigned _pad:21;
660 } undef;
661 struct {
662 unsigned instr_type:4;
663 unsigned num_srcs:4;
664 unsigned op:4;
665 unsigned texture_array_size:12;
666 unsigned dest:8;
667 } tex;
668 struct {
669 unsigned instr_type:4;
670 unsigned num_srcs:20;
671 unsigned dest:8;
672 } phi;
673 struct {
674 unsigned instr_type:4;
675 unsigned type:2;
676 unsigned _pad:26;
677 } jump;
678 };
679
680 /* Write "lo24" as low 24 bits in the first uint32. */
681 static void
682 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
683 nir_instr_type instr_type)
684 {
685 STATIC_ASSERT(sizeof(union packed_dest) == 1);
686 union packed_dest dest;
687 dest.u8 = 0;
688
689 dest.ssa.is_ssa = dst->is_ssa;
690 if (dst->is_ssa) {
691 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
692 dest.ssa.num_components =
693 encode_num_components_in_3bits(dst->ssa.num_components);
694 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
695 } else {
696 dest.reg.is_indirect = !!(dst->reg.indirect);
697 }
698 header.any.dest = dest.u8;
699
700 /* Check if the current ALU instruction has the same header as the previous
701 * instruction that is also ALU. If it is, we don't have to write
702 * the current header. This is a typical occurence after scalarization.
703 */
704 if (instr_type == nir_instr_type_alu) {
705 bool equal_header = false;
706
707 if (ctx->last_instr_type == nir_instr_type_alu) {
708 assert(ctx->last_alu_header_offset);
709 union packed_instr *last_header =
710 (union packed_instr *)(ctx->blob->data +
711 ctx->last_alu_header_offset);
712
713 /* Clear the field that counts ALUs with equal headers. */
714 union packed_instr clean_header;
715 clean_header.u32 = last_header->u32;
716 clean_header.alu.num_followup_alu_sharing_header = 0;
717
718 /* There can be at most 4 consecutive ALU instructions
719 * sharing the same header.
720 */
721 if (last_header->alu.num_followup_alu_sharing_header < 3 &&
722 header.u32 == clean_header.u32) {
723 last_header->alu.num_followup_alu_sharing_header++;
724 equal_header = true;
725 }
726 }
727
728 if (!equal_header) {
729 ctx->last_alu_header_offset = ctx->blob->size;
730 blob_write_uint32(ctx->blob, header.u32);
731 }
732 } else {
733 blob_write_uint32(ctx->blob, header.u32);
734 }
735
736 if (dest.ssa.is_ssa &&
737 dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
738 blob_write_uint32(ctx->blob, dst->ssa.num_components);
739
740 if (dst->is_ssa) {
741 write_add_object(ctx, &dst->ssa);
742 if (dest.ssa.has_name)
743 blob_write_string(ctx->blob, dst->ssa.name);
744 } else {
745 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
746 blob_write_uint32(ctx->blob, dst->reg.base_offset);
747 if (dst->reg.indirect)
748 write_src(ctx, dst->reg.indirect);
749 }
750 }
751
752 static void
753 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
754 union packed_instr header)
755 {
756 union packed_dest dest;
757 dest.u8 = header.any.dest;
758
759 if (dest.ssa.is_ssa) {
760 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
761 unsigned num_components;
762 if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
763 num_components = blob_read_uint32(ctx->blob);
764 else
765 num_components = decode_num_components_in_3bits(dest.ssa.num_components);
766 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
767 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
768 read_add_object(ctx, &dst->ssa);
769 } else {
770 dst->reg.reg = read_object(ctx);
771 dst->reg.base_offset = blob_read_uint32(ctx->blob);
772 if (dest.reg.is_indirect) {
773 dst->reg.indirect = ralloc(instr, nir_src);
774 read_src(ctx, dst->reg.indirect, instr);
775 }
776 }
777 }
778
779 static bool
780 are_object_ids_16bit(write_ctx *ctx)
781 {
782 /* Check the highest object ID, because they are monotonic. */
783 return ctx->next_idx < (1 << 16);
784 }
785
786 static bool
787 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
788 {
789 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
790
791 for (unsigned i = 0; i < num_srcs; i++) {
792 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
793 return false;
794
795 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
796
797 for (unsigned chan = 0; chan < src_components; chan++) {
798 /* The swizzles for src0.x and src1.x are stored
799 * in writemask_or_two_swizzles for SSA ALUs.
800 */
801 if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
802 alu->src[i].swizzle[chan] < 4)
803 continue;
804
805 if (alu->src[i].swizzle[chan] != chan)
806 return false;
807 }
808 }
809
810 return are_object_ids_16bit(ctx);
811 }
812
813 static void
814 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
815 {
816 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
817 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
818
819 /* 9 bits for nir_op */
820 STATIC_ASSERT(nir_num_opcodes <= 512);
821 union packed_instr header;
822 header.u32 = 0;
823
824 header.alu.instr_type = alu->instr.type;
825 header.alu.exact = alu->exact;
826 header.alu.no_signed_wrap = alu->no_signed_wrap;
827 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
828 header.alu.saturate = alu->dest.saturate;
829 header.alu.op = alu->op;
830 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
831
832 if (header.alu.packed_src_ssa_16bit &&
833 alu->dest.dest.is_ssa) {
834 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
835 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
836 if (num_srcs > 1)
837 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
838 } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
839 /* For vec4 registers, this field is a writemask. */
840 header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
841 }
842
843 write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
844
845 if (!alu->dest.dest.is_ssa && dst_components > 4)
846 blob_write_uint32(ctx->blob, alu->dest.write_mask);
847
848 if (header.alu.packed_src_ssa_16bit) {
849 for (unsigned i = 0; i < num_srcs; i++) {
850 assert(alu->src[i].src.is_ssa);
851 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
852 assert(idx < (1 << 16));
853 blob_write_uint16(ctx->blob, idx);
854 }
855 } else {
856 for (unsigned i = 0; i < num_srcs; i++) {
857 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
858 unsigned src_components = nir_src_num_components(alu->src[i].src);
859 union packed_src src;
860 bool packed = src_components <= 4 && src_channels <= 4;
861 src.u32 = 0;
862
863 src.alu.negate = alu->src[i].negate;
864 src.alu.abs = alu->src[i].abs;
865
866 if (packed) {
867 src.alu.swizzle_x = alu->src[i].swizzle[0];
868 src.alu.swizzle_y = alu->src[i].swizzle[1];
869 src.alu.swizzle_z = alu->src[i].swizzle[2];
870 src.alu.swizzle_w = alu->src[i].swizzle[3];
871 }
872
873 write_src_full(ctx, &alu->src[i].src, src);
874
875 /* Store swizzles for vec8 and vec16. */
876 if (!packed) {
877 for (unsigned o = 0; o < src_channels; o += 8) {
878 unsigned value = 0;
879
880 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
881 value |= (uint32_t)alu->src[i].swizzle[o + j] <<
882 (4 * j); /* 4 bits per swizzle */
883 }
884
885 blob_write_uint32(ctx->blob, value);
886 }
887 }
888 }
889 }
890 }
891
892 static nir_alu_instr *
893 read_alu(read_ctx *ctx, union packed_instr header)
894 {
895 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
896 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
897
898 alu->exact = header.alu.exact;
899 alu->no_signed_wrap = header.alu.no_signed_wrap;
900 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
901 alu->dest.saturate = header.alu.saturate;
902
903 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
904
905 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
906
907 if (alu->dest.dest.is_ssa) {
908 alu->dest.write_mask = u_bit_consecutive(0, dst_components);
909 } else if (dst_components <= 4) {
910 alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
911 } else {
912 alu->dest.write_mask = blob_read_uint32(ctx->blob);
913 }
914
915 if (header.alu.packed_src_ssa_16bit) {
916 for (unsigned i = 0; i < num_srcs; i++) {
917 nir_alu_src *src = &alu->src[i];
918 src->src.is_ssa = true;
919 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
920
921 memset(&src->swizzle, 0, sizeof(src->swizzle));
922
923 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
924
925 for (unsigned chan = 0; chan < src_components; chan++)
926 src->swizzle[chan] = chan;
927 }
928 } else {
929 for (unsigned i = 0; i < num_srcs; i++) {
930 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
931 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
932 unsigned src_components = nir_src_num_components(alu->src[i].src);
933 bool packed = src_components <= 4 && src_channels <= 4;
934
935 alu->src[i].negate = src.alu.negate;
936 alu->src[i].abs = src.alu.abs;
937
938 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
939
940 if (packed) {
941 alu->src[i].swizzle[0] = src.alu.swizzle_x;
942 alu->src[i].swizzle[1] = src.alu.swizzle_y;
943 alu->src[i].swizzle[2] = src.alu.swizzle_z;
944 alu->src[i].swizzle[3] = src.alu.swizzle_w;
945 } else {
946 /* Load swizzles for vec8 and vec16. */
947 for (unsigned o = 0; o < src_channels; o += 8) {
948 unsigned value = blob_read_uint32(ctx->blob);
949
950 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
951 alu->src[i].swizzle[o + j] =
952 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
953 }
954 }
955 }
956 }
957 }
958
959 if (header.alu.packed_src_ssa_16bit &&
960 alu->dest.dest.is_ssa) {
961 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
962 if (num_srcs > 1)
963 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
964 }
965
966 return alu;
967 }
968
969 static void
970 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
971 {
972 assert(deref->deref_type < 8);
973 assert(deref->mode < (1 << 10));
974
975 union packed_instr header;
976 header.u32 = 0;
977
978 header.deref.instr_type = deref->instr.type;
979 header.deref.deref_type = deref->deref_type;
980
981 if (deref->deref_type == nir_deref_type_cast) {
982 header.deref.mode = deref->mode;
983 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
984 }
985
986 unsigned var_idx = 0;
987 if (deref->deref_type == nir_deref_type_var) {
988 var_idx = write_lookup_object(ctx, deref->var);
989 if (var_idx && var_idx < (1 << 16))
990 header.deref_var.object_idx = var_idx;
991 }
992
993 if (deref->deref_type == nir_deref_type_array ||
994 deref->deref_type == nir_deref_type_ptr_as_array) {
995 header.deref.packed_src_ssa_16bit =
996 deref->parent.is_ssa && deref->arr.index.is_ssa &&
997 are_object_ids_16bit(ctx);
998 }
999
1000 write_dest(ctx, &deref->dest, header, deref->instr.type);
1001
1002 switch (deref->deref_type) {
1003 case nir_deref_type_var:
1004 if (!header.deref_var.object_idx)
1005 blob_write_uint32(ctx->blob, var_idx);
1006 break;
1007
1008 case nir_deref_type_struct:
1009 write_src(ctx, &deref->parent);
1010 blob_write_uint32(ctx->blob, deref->strct.index);
1011 break;
1012
1013 case nir_deref_type_array:
1014 case nir_deref_type_ptr_as_array:
1015 if (header.deref.packed_src_ssa_16bit) {
1016 blob_write_uint16(ctx->blob,
1017 write_lookup_object(ctx, deref->parent.ssa));
1018 blob_write_uint16(ctx->blob,
1019 write_lookup_object(ctx, deref->arr.index.ssa));
1020 } else {
1021 write_src(ctx, &deref->parent);
1022 write_src(ctx, &deref->arr.index);
1023 }
1024 break;
1025
1026 case nir_deref_type_cast:
1027 write_src(ctx, &deref->parent);
1028 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1029 if (!header.deref.cast_type_same_as_last) {
1030 encode_type_to_blob(ctx->blob, deref->type);
1031 ctx->last_type = deref->type;
1032 }
1033 break;
1034
1035 case nir_deref_type_array_wildcard:
1036 write_src(ctx, &deref->parent);
1037 break;
1038
1039 default:
1040 unreachable("Invalid deref type");
1041 }
1042 }
1043
1044 static nir_deref_instr *
1045 read_deref(read_ctx *ctx, union packed_instr header)
1046 {
1047 nir_deref_type deref_type = header.deref.deref_type;
1048 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1049
1050 read_dest(ctx, &deref->dest, &deref->instr, header);
1051
1052 nir_deref_instr *parent;
1053
1054 switch (deref->deref_type) {
1055 case nir_deref_type_var:
1056 if (header.deref_var.object_idx)
1057 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1058 else
1059 deref->var = read_object(ctx);
1060
1061 deref->type = deref->var->type;
1062 break;
1063
1064 case nir_deref_type_struct:
1065 read_src(ctx, &deref->parent, &deref->instr);
1066 parent = nir_src_as_deref(deref->parent);
1067 deref->strct.index = blob_read_uint32(ctx->blob);
1068 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1069 break;
1070
1071 case nir_deref_type_array:
1072 case nir_deref_type_ptr_as_array:
1073 if (header.deref.packed_src_ssa_16bit) {
1074 deref->parent.is_ssa = true;
1075 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1076 deref->arr.index.is_ssa = true;
1077 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1078 } else {
1079 read_src(ctx, &deref->parent, &deref->instr);
1080 read_src(ctx, &deref->arr.index, &deref->instr);
1081 }
1082
1083 parent = nir_src_as_deref(deref->parent);
1084 if (deref->deref_type == nir_deref_type_array)
1085 deref->type = glsl_get_array_element(parent->type);
1086 else
1087 deref->type = parent->type;
1088 break;
1089
1090 case nir_deref_type_cast:
1091 read_src(ctx, &deref->parent, &deref->instr);
1092 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1093 if (header.deref.cast_type_same_as_last) {
1094 deref->type = ctx->last_type;
1095 } else {
1096 deref->type = decode_type_from_blob(ctx->blob);
1097 ctx->last_type = deref->type;
1098 }
1099 break;
1100
1101 case nir_deref_type_array_wildcard:
1102 read_src(ctx, &deref->parent, &deref->instr);
1103 parent = nir_src_as_deref(deref->parent);
1104 deref->type = glsl_get_array_element(parent->type);
1105 break;
1106
1107 default:
1108 unreachable("Invalid deref type");
1109 }
1110
1111 if (deref_type == nir_deref_type_var) {
1112 deref->mode = deref->var->data.mode;
1113 } else if (deref->deref_type == nir_deref_type_cast) {
1114 deref->mode = header.deref.mode;
1115 } else {
1116 assert(deref->parent.is_ssa);
1117 deref->mode = nir_instr_as_deref(deref->parent.ssa->parent_instr)->mode;
1118 }
1119
1120 return deref;
1121 }
1122
1123 static void
1124 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1125 {
1126 /* 9 bits for nir_intrinsic_op */
1127 STATIC_ASSERT(nir_num_intrinsics <= 512);
1128 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1129 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1130 assert(intrin->intrinsic < 512);
1131
1132 union packed_instr header;
1133 header.u32 = 0;
1134
1135 header.intrinsic.instr_type = intrin->instr.type;
1136 header.intrinsic.intrinsic = intrin->intrinsic;
1137
1138 /* Analyze constant indices to decide how to encode them. */
1139 if (num_indices) {
1140 unsigned max_bits = 0;
1141 for (unsigned i = 0; i < num_indices; i++) {
1142 unsigned max = util_last_bit(intrin->const_index[i]);
1143 max_bits = MAX2(max_bits, max);
1144 }
1145
1146 if (max_bits * num_indices <= 9) {
1147 header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
1148
1149 /* Pack all const indices into 6 bits. */
1150 unsigned bit_size = 9 / num_indices;
1151 for (unsigned i = 0; i < num_indices; i++) {
1152 header.intrinsic.packed_const_indices |=
1153 intrin->const_index[i] << (i * bit_size);
1154 }
1155 } else if (max_bits <= 8)
1156 header.intrinsic.const_indices_encoding = const_indices_8bit;
1157 else if (max_bits <= 16)
1158 header.intrinsic.const_indices_encoding = const_indices_16bit;
1159 else
1160 header.intrinsic.const_indices_encoding = const_indices_32bit;
1161 }
1162
1163 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1164 write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1165 else
1166 blob_write_uint32(ctx->blob, header.u32);
1167
1168 for (unsigned i = 0; i < num_srcs; i++)
1169 write_src(ctx, &intrin->src[i]);
1170
1171 if (num_indices) {
1172 switch (header.intrinsic.const_indices_encoding) {
1173 case const_indices_8bit:
1174 for (unsigned i = 0; i < num_indices; i++)
1175 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1176 break;
1177 case const_indices_16bit:
1178 for (unsigned i = 0; i < num_indices; i++)
1179 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1180 break;
1181 case const_indices_32bit:
1182 for (unsigned i = 0; i < num_indices; i++)
1183 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1184 break;
1185 }
1186 }
1187 }
1188
1189 static nir_intrinsic_instr *
1190 read_intrinsic(read_ctx *ctx, union packed_instr header)
1191 {
1192 nir_intrinsic_op op = header.intrinsic.intrinsic;
1193 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1194
1195 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1196 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1197
1198 if (nir_intrinsic_infos[op].has_dest)
1199 read_dest(ctx, &intrin->dest, &intrin->instr, header);
1200
1201 for (unsigned i = 0; i < num_srcs; i++)
1202 read_src(ctx, &intrin->src[i], &intrin->instr);
1203
1204 /* Vectorized instrinsics have num_components same as dst or src that has
1205 * 0 components in the info. Find it.
1206 */
1207 if (nir_intrinsic_infos[op].has_dest &&
1208 nir_intrinsic_infos[op].dest_components == 0) {
1209 intrin->num_components = nir_dest_num_components(intrin->dest);
1210 } else {
1211 for (unsigned i = 0; i < num_srcs; i++) {
1212 if (nir_intrinsic_infos[op].src_components[i] == 0) {
1213 intrin->num_components = nir_src_num_components(intrin->src[i]);
1214 break;
1215 }
1216 }
1217 }
1218
1219 if (num_indices) {
1220 switch (header.intrinsic.const_indices_encoding) {
1221 case const_indices_9bit_all_combined: {
1222 unsigned bit_size = 9 / num_indices;
1223 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1224 for (unsigned i = 0; i < num_indices; i++) {
1225 intrin->const_index[i] =
1226 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1227 bit_mask;
1228 }
1229 break;
1230 }
1231 case const_indices_8bit:
1232 for (unsigned i = 0; i < num_indices; i++)
1233 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1234 break;
1235 case const_indices_16bit:
1236 for (unsigned i = 0; i < num_indices; i++)
1237 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1238 break;
1239 case const_indices_32bit:
1240 for (unsigned i = 0; i < num_indices; i++)
1241 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1242 break;
1243 }
1244 }
1245
1246 return intrin;
1247 }
1248
1249 static void
1250 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1251 {
1252 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1253 union packed_instr header;
1254 header.u32 = 0;
1255
1256 header.load_const.instr_type = lc->instr.type;
1257 header.load_const.last_component = lc->def.num_components - 1;
1258 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1259 header.load_const.packing = load_const_full;
1260
1261 /* Try to pack 1-component constants into the 19 free bits in the header. */
1262 if (lc->def.num_components == 1) {
1263 switch (lc->def.bit_size) {
1264 case 64:
1265 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1266 /* packed_value contains high 19 bits, low bits are 0 */
1267 header.load_const.packing = load_const_scalar_hi_19bits;
1268 header.load_const.packed_value = lc->value[0].u64 >> 45;
1269 } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1270 /* packed_value contains low 19 bits, high bits are sign-extended */
1271 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1272 header.load_const.packed_value = lc->value[0].u64;
1273 }
1274 break;
1275
1276 case 32:
1277 if ((lc->value[0].u32 & 0x1fff) == 0) {
1278 header.load_const.packing = load_const_scalar_hi_19bits;
1279 header.load_const.packed_value = lc->value[0].u32 >> 13;
1280 } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1281 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1282 header.load_const.packed_value = lc->value[0].u32;
1283 }
1284 break;
1285
1286 case 16:
1287 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1288 header.load_const.packed_value = lc->value[0].u16;
1289 break;
1290 case 8:
1291 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1292 header.load_const.packed_value = lc->value[0].u8;
1293 break;
1294 case 1:
1295 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1296 header.load_const.packed_value = lc->value[0].b;
1297 break;
1298 default:
1299 unreachable("invalid bit_size");
1300 }
1301 }
1302
1303 blob_write_uint32(ctx->blob, header.u32);
1304
1305 if (header.load_const.packing == load_const_full) {
1306 switch (lc->def.bit_size) {
1307 case 64:
1308 blob_write_bytes(ctx->blob, lc->value,
1309 sizeof(*lc->value) * lc->def.num_components);
1310 break;
1311
1312 case 32:
1313 for (unsigned i = 0; i < lc->def.num_components; i++)
1314 blob_write_uint32(ctx->blob, lc->value[i].u32);
1315 break;
1316
1317 case 16:
1318 for (unsigned i = 0; i < lc->def.num_components; i++)
1319 blob_write_uint16(ctx->blob, lc->value[i].u16);
1320 break;
1321
1322 default:
1323 assert(lc->def.bit_size <= 8);
1324 for (unsigned i = 0; i < lc->def.num_components; i++)
1325 blob_write_uint8(ctx->blob, lc->value[i].u8);
1326 break;
1327 }
1328 }
1329
1330 write_add_object(ctx, &lc->def);
1331 }
1332
1333 static nir_load_const_instr *
1334 read_load_const(read_ctx *ctx, union packed_instr header)
1335 {
1336 nir_load_const_instr *lc =
1337 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1338 decode_bit_size_3bits(header.load_const.bit_size));
1339
1340 switch (header.load_const.packing) {
1341 case load_const_scalar_hi_19bits:
1342 switch (lc->def.bit_size) {
1343 case 64:
1344 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1345 break;
1346 case 32:
1347 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1348 break;
1349 default:
1350 unreachable("invalid bit_size");
1351 }
1352 break;
1353
1354 case load_const_scalar_lo_19bits_sext:
1355 switch (lc->def.bit_size) {
1356 case 64:
1357 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1358 break;
1359 case 32:
1360 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1361 break;
1362 case 16:
1363 lc->value[0].u16 = header.load_const.packed_value;
1364 break;
1365 case 8:
1366 lc->value[0].u8 = header.load_const.packed_value;
1367 break;
1368 case 1:
1369 lc->value[0].b = header.load_const.packed_value;
1370 break;
1371 default:
1372 unreachable("invalid bit_size");
1373 }
1374 break;
1375
1376 case load_const_full:
1377 switch (lc->def.bit_size) {
1378 case 64:
1379 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1380 break;
1381
1382 case 32:
1383 for (unsigned i = 0; i < lc->def.num_components; i++)
1384 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1385 break;
1386
1387 case 16:
1388 for (unsigned i = 0; i < lc->def.num_components; i++)
1389 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1390 break;
1391
1392 default:
1393 assert(lc->def.bit_size <= 8);
1394 for (unsigned i = 0; i < lc->def.num_components; i++)
1395 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1396 break;
1397 }
1398 break;
1399 }
1400
1401 read_add_object(ctx, &lc->def);
1402 return lc;
1403 }
1404
1405 static void
1406 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1407 {
1408 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1409
1410 union packed_instr header;
1411 header.u32 = 0;
1412
1413 header.undef.instr_type = undef->instr.type;
1414 header.undef.last_component = undef->def.num_components - 1;
1415 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1416
1417 blob_write_uint32(ctx->blob, header.u32);
1418 write_add_object(ctx, &undef->def);
1419 }
1420
1421 static nir_ssa_undef_instr *
1422 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1423 {
1424 nir_ssa_undef_instr *undef =
1425 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1426 decode_bit_size_3bits(header.undef.bit_size));
1427
1428 read_add_object(ctx, &undef->def);
1429 return undef;
1430 }
1431
1432 union packed_tex_data {
1433 uint32_t u32;
1434 struct {
1435 enum glsl_sampler_dim sampler_dim:4;
1436 nir_alu_type dest_type:8;
1437 unsigned coord_components:3;
1438 unsigned is_array:1;
1439 unsigned is_shadow:1;
1440 unsigned is_new_style_shadow:1;
1441 unsigned component:2;
1442 unsigned texture_non_uniform:1;
1443 unsigned sampler_non_uniform:1;
1444 unsigned unused:8; /* Mark unused for valgrind. */
1445 } u;
1446 };
1447
1448 static void
1449 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1450 {
1451 assert(tex->num_srcs < 16);
1452 assert(tex->op < 16);
1453 assert(tex->texture_array_size < 1024);
1454
1455 union packed_instr header;
1456 header.u32 = 0;
1457
1458 header.tex.instr_type = tex->instr.type;
1459 header.tex.num_srcs = tex->num_srcs;
1460 header.tex.op = tex->op;
1461 header.tex.texture_array_size = tex->texture_array_size;
1462
1463 write_dest(ctx, &tex->dest, header, tex->instr.type);
1464
1465 blob_write_uint32(ctx->blob, tex->texture_index);
1466 blob_write_uint32(ctx->blob, tex->sampler_index);
1467 if (tex->op == nir_texop_tg4)
1468 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1469
1470 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1471 union packed_tex_data packed = {
1472 .u.sampler_dim = tex->sampler_dim,
1473 .u.dest_type = tex->dest_type,
1474 .u.coord_components = tex->coord_components,
1475 .u.is_array = tex->is_array,
1476 .u.is_shadow = tex->is_shadow,
1477 .u.is_new_style_shadow = tex->is_new_style_shadow,
1478 .u.component = tex->component,
1479 .u.texture_non_uniform = tex->texture_non_uniform,
1480 .u.sampler_non_uniform = tex->sampler_non_uniform,
1481 };
1482 blob_write_uint32(ctx->blob, packed.u32);
1483
1484 for (unsigned i = 0; i < tex->num_srcs; i++) {
1485 union packed_src src;
1486 src.u32 = 0;
1487 src.tex.src_type = tex->src[i].src_type;
1488 write_src_full(ctx, &tex->src[i].src, src);
1489 }
1490 }
1491
1492 static nir_tex_instr *
1493 read_tex(read_ctx *ctx, union packed_instr header)
1494 {
1495 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1496
1497 read_dest(ctx, &tex->dest, &tex->instr, header);
1498
1499 tex->op = header.tex.op;
1500 tex->texture_index = blob_read_uint32(ctx->blob);
1501 tex->texture_array_size = header.tex.texture_array_size;
1502 tex->sampler_index = blob_read_uint32(ctx->blob);
1503 if (tex->op == nir_texop_tg4)
1504 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1505
1506 union packed_tex_data packed;
1507 packed.u32 = blob_read_uint32(ctx->blob);
1508 tex->sampler_dim = packed.u.sampler_dim;
1509 tex->dest_type = packed.u.dest_type;
1510 tex->coord_components = packed.u.coord_components;
1511 tex->is_array = packed.u.is_array;
1512 tex->is_shadow = packed.u.is_shadow;
1513 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1514 tex->component = packed.u.component;
1515 tex->texture_non_uniform = packed.u.texture_non_uniform;
1516 tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1517
1518 for (unsigned i = 0; i < tex->num_srcs; i++) {
1519 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1520 tex->src[i].src_type = src.tex.src_type;
1521 }
1522
1523 return tex;
1524 }
1525
1526 static void
1527 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1528 {
1529 union packed_instr header;
1530 header.u32 = 0;
1531
1532 header.phi.instr_type = phi->instr.type;
1533 header.phi.num_srcs = exec_list_length(&phi->srcs);
1534
1535 /* Phi nodes are special, since they may reference SSA definitions and
1536 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1537 * and then store enough information so that a later fixup pass can fill
1538 * them in correctly.
1539 */
1540 write_dest(ctx, &phi->dest, header, phi->instr.type);
1541
1542 nir_foreach_phi_src(src, phi) {
1543 assert(src->src.is_ssa);
1544 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1545 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1546 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1547 write_phi_fixup fixup = {
1548 .blob_offset = blob_offset,
1549 .src = src->src.ssa,
1550 .block = src->pred,
1551 };
1552 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1553 }
1554 }
1555
1556 static void
1557 write_fixup_phis(write_ctx *ctx)
1558 {
1559 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1560 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1561 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1562 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1563 }
1564
1565 util_dynarray_clear(&ctx->phi_fixups);
1566 }
1567
1568 static nir_phi_instr *
1569 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1570 {
1571 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1572
1573 read_dest(ctx, &phi->dest, &phi->instr, header);
1574
1575 /* For similar reasons as before, we just store the index directly into the
1576 * pointer, and let a later pass resolve the phi sources.
1577 *
1578 * In order to ensure that the copied sources (which are just the indices
1579 * from the blob for now) don't get inserted into the old shader's use-def
1580 * lists, we have to add the phi instruction *before* we set up its
1581 * sources.
1582 */
1583 nir_instr_insert_after_block(blk, &phi->instr);
1584
1585 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1586 nir_phi_src *src = ralloc(phi, nir_phi_src);
1587
1588 src->src.is_ssa = true;
1589 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1590 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1591
1592 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1593 * we have to set the parent_instr manually. It doesn't really matter
1594 * when we do it, so we might as well do it here.
1595 */
1596 src->src.parent_instr = &phi->instr;
1597
1598 /* Stash it in the list of phi sources. We'll walk this list and fix up
1599 * sources at the very end of read_function_impl.
1600 */
1601 list_add(&src->src.use_link, &ctx->phi_srcs);
1602
1603 exec_list_push_tail(&phi->srcs, &src->node);
1604 }
1605
1606 return phi;
1607 }
1608
1609 static void
1610 read_fixup_phis(read_ctx *ctx)
1611 {
1612 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1613 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1614 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1615
1616 /* Remove from this list */
1617 list_del(&src->src.use_link);
1618
1619 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1620 }
1621 assert(list_is_empty(&ctx->phi_srcs));
1622 }
1623
1624 static void
1625 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1626 {
1627 assert(jmp->type < 4);
1628
1629 union packed_instr header;
1630 header.u32 = 0;
1631
1632 header.jump.instr_type = jmp->instr.type;
1633 header.jump.type = jmp->type;
1634
1635 blob_write_uint32(ctx->blob, header.u32);
1636 }
1637
1638 static nir_jump_instr *
1639 read_jump(read_ctx *ctx, union packed_instr header)
1640 {
1641 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1642 return jmp;
1643 }
1644
1645 static void
1646 write_call(write_ctx *ctx, const nir_call_instr *call)
1647 {
1648 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1649
1650 for (unsigned i = 0; i < call->num_params; i++)
1651 write_src(ctx, &call->params[i]);
1652 }
1653
1654 static nir_call_instr *
1655 read_call(read_ctx *ctx)
1656 {
1657 nir_function *callee = read_object(ctx);
1658 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1659
1660 for (unsigned i = 0; i < call->num_params; i++)
1661 read_src(ctx, &call->params[i], call);
1662
1663 return call;
1664 }
1665
1666 static void
1667 write_instr(write_ctx *ctx, const nir_instr *instr)
1668 {
1669 /* We have only 4 bits for the instruction type. */
1670 assert(instr->type < 16);
1671
1672 switch (instr->type) {
1673 case nir_instr_type_alu:
1674 write_alu(ctx, nir_instr_as_alu(instr));
1675 break;
1676 case nir_instr_type_deref:
1677 write_deref(ctx, nir_instr_as_deref(instr));
1678 break;
1679 case nir_instr_type_intrinsic:
1680 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1681 break;
1682 case nir_instr_type_load_const:
1683 write_load_const(ctx, nir_instr_as_load_const(instr));
1684 break;
1685 case nir_instr_type_ssa_undef:
1686 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1687 break;
1688 case nir_instr_type_tex:
1689 write_tex(ctx, nir_instr_as_tex(instr));
1690 break;
1691 case nir_instr_type_phi:
1692 write_phi(ctx, nir_instr_as_phi(instr));
1693 break;
1694 case nir_instr_type_jump:
1695 write_jump(ctx, nir_instr_as_jump(instr));
1696 break;
1697 case nir_instr_type_call:
1698 blob_write_uint32(ctx->blob, instr->type);
1699 write_call(ctx, nir_instr_as_call(instr));
1700 break;
1701 case nir_instr_type_parallel_copy:
1702 unreachable("Cannot write parallel copies");
1703 default:
1704 unreachable("bad instr type");
1705 }
1706 }
1707
1708 /* Return the number of instructions read. */
1709 static unsigned
1710 read_instr(read_ctx *ctx, nir_block *block)
1711 {
1712 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1713 union packed_instr header;
1714 header.u32 = blob_read_uint32(ctx->blob);
1715 nir_instr *instr;
1716
1717 switch (header.any.instr_type) {
1718 case nir_instr_type_alu:
1719 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1720 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1721 return header.alu.num_followup_alu_sharing_header + 1;
1722 case nir_instr_type_deref:
1723 instr = &read_deref(ctx, header)->instr;
1724 break;
1725 case nir_instr_type_intrinsic:
1726 instr = &read_intrinsic(ctx, header)->instr;
1727 break;
1728 case nir_instr_type_load_const:
1729 instr = &read_load_const(ctx, header)->instr;
1730 break;
1731 case nir_instr_type_ssa_undef:
1732 instr = &read_ssa_undef(ctx, header)->instr;
1733 break;
1734 case nir_instr_type_tex:
1735 instr = &read_tex(ctx, header)->instr;
1736 break;
1737 case nir_instr_type_phi:
1738 /* Phi instructions are a bit of a special case when reading because we
1739 * don't want inserting the instruction to automatically handle use/defs
1740 * for us. Instead, we need to wait until all the blocks/instructions
1741 * are read so that we can set their sources up.
1742 */
1743 read_phi(ctx, block, header);
1744 return 1;
1745 case nir_instr_type_jump:
1746 instr = &read_jump(ctx, header)->instr;
1747 break;
1748 case nir_instr_type_call:
1749 instr = &read_call(ctx)->instr;
1750 break;
1751 case nir_instr_type_parallel_copy:
1752 unreachable("Cannot read parallel copies");
1753 default:
1754 unreachable("bad instr type");
1755 }
1756
1757 nir_instr_insert_after_block(block, instr);
1758 return 1;
1759 }
1760
1761 static void
1762 write_block(write_ctx *ctx, const nir_block *block)
1763 {
1764 write_add_object(ctx, block);
1765 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1766
1767 ctx->last_instr_type = ~0;
1768 ctx->last_alu_header_offset = 0;
1769
1770 nir_foreach_instr(instr, block) {
1771 write_instr(ctx, instr);
1772 ctx->last_instr_type = instr->type;
1773 }
1774 }
1775
1776 static void
1777 read_block(read_ctx *ctx, struct exec_list *cf_list)
1778 {
1779 /* Don't actually create a new block. Just use the one from the tail of
1780 * the list. NIR guarantees that the tail of the list is a block and that
1781 * no two blocks are side-by-side in the IR; It should be empty.
1782 */
1783 nir_block *block =
1784 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1785
1786 read_add_object(ctx, block);
1787 unsigned num_instrs = blob_read_uint32(ctx->blob);
1788 for (unsigned i = 0; i < num_instrs;) {
1789 i += read_instr(ctx, block);
1790 }
1791 }
1792
1793 static void
1794 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1795
1796 static void
1797 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1798
1799 static void
1800 write_if(write_ctx *ctx, nir_if *nif)
1801 {
1802 write_src(ctx, &nif->condition);
1803
1804 write_cf_list(ctx, &nif->then_list);
1805 write_cf_list(ctx, &nif->else_list);
1806 }
1807
1808 static void
1809 read_if(read_ctx *ctx, struct exec_list *cf_list)
1810 {
1811 nir_if *nif = nir_if_create(ctx->nir);
1812
1813 read_src(ctx, &nif->condition, nif);
1814
1815 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1816
1817 read_cf_list(ctx, &nif->then_list);
1818 read_cf_list(ctx, &nif->else_list);
1819 }
1820
1821 static void
1822 write_loop(write_ctx *ctx, nir_loop *loop)
1823 {
1824 write_cf_list(ctx, &loop->body);
1825 }
1826
1827 static void
1828 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1829 {
1830 nir_loop *loop = nir_loop_create(ctx->nir);
1831
1832 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1833
1834 read_cf_list(ctx, &loop->body);
1835 }
1836
1837 static void
1838 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1839 {
1840 blob_write_uint32(ctx->blob, cf->type);
1841
1842 switch (cf->type) {
1843 case nir_cf_node_block:
1844 write_block(ctx, nir_cf_node_as_block(cf));
1845 break;
1846 case nir_cf_node_if:
1847 write_if(ctx, nir_cf_node_as_if(cf));
1848 break;
1849 case nir_cf_node_loop:
1850 write_loop(ctx, nir_cf_node_as_loop(cf));
1851 break;
1852 default:
1853 unreachable("bad cf type");
1854 }
1855 }
1856
1857 static void
1858 read_cf_node(read_ctx *ctx, struct exec_list *list)
1859 {
1860 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1861
1862 switch (type) {
1863 case nir_cf_node_block:
1864 read_block(ctx, list);
1865 break;
1866 case nir_cf_node_if:
1867 read_if(ctx, list);
1868 break;
1869 case nir_cf_node_loop:
1870 read_loop(ctx, list);
1871 break;
1872 default:
1873 unreachable("bad cf type");
1874 }
1875 }
1876
1877 static void
1878 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1879 {
1880 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1881 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1882 write_cf_node(ctx, cf);
1883 }
1884 }
1885
1886 static void
1887 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1888 {
1889 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1890 for (unsigned i = 0; i < num_cf_nodes; i++)
1891 read_cf_node(ctx, cf_list);
1892 }
1893
1894 static void
1895 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1896 {
1897 write_var_list(ctx, &fi->locals);
1898 write_reg_list(ctx, &fi->registers);
1899 blob_write_uint32(ctx->blob, fi->reg_alloc);
1900
1901 write_cf_list(ctx, &fi->body);
1902 write_fixup_phis(ctx);
1903 }
1904
1905 static nir_function_impl *
1906 read_function_impl(read_ctx *ctx, nir_function *fxn)
1907 {
1908 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1909 fi->function = fxn;
1910
1911 read_var_list(ctx, &fi->locals);
1912 read_reg_list(ctx, &fi->registers);
1913 fi->reg_alloc = blob_read_uint32(ctx->blob);
1914
1915 read_cf_list(ctx, &fi->body);
1916 read_fixup_phis(ctx);
1917
1918 fi->valid_metadata = 0;
1919
1920 return fi;
1921 }
1922
1923 static void
1924 write_function(write_ctx *ctx, const nir_function *fxn)
1925 {
1926 uint32_t flags = fxn->is_entrypoint;
1927 if (fxn->name)
1928 flags |= 0x2;
1929 if (fxn->impl)
1930 flags |= 0x4;
1931 blob_write_uint32(ctx->blob, flags);
1932 if (fxn->name)
1933 blob_write_string(ctx->blob, fxn->name);
1934
1935 write_add_object(ctx, fxn);
1936
1937 blob_write_uint32(ctx->blob, fxn->num_params);
1938 for (unsigned i = 0; i < fxn->num_params; i++) {
1939 uint32_t val =
1940 ((uint32_t)fxn->params[i].num_components) |
1941 ((uint32_t)fxn->params[i].bit_size) << 8;
1942 blob_write_uint32(ctx->blob, val);
1943 }
1944
1945 /* At first glance, it looks like we should write the function_impl here.
1946 * However, call instructions need to be able to reference at least the
1947 * function and those will get processed as we write the function_impls.
1948 * We stop here and write function_impls as a second pass.
1949 */
1950 }
1951
1952 static void
1953 read_function(read_ctx *ctx)
1954 {
1955 uint32_t flags = blob_read_uint32(ctx->blob);
1956 bool has_name = flags & 0x2;
1957 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1958
1959 nir_function *fxn = nir_function_create(ctx->nir, name);
1960
1961 read_add_object(ctx, fxn);
1962
1963 fxn->num_params = blob_read_uint32(ctx->blob);
1964 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1965 for (unsigned i = 0; i < fxn->num_params; i++) {
1966 uint32_t val = blob_read_uint32(ctx->blob);
1967 fxn->params[i].num_components = val & 0xff;
1968 fxn->params[i].bit_size = (val >> 8) & 0xff;
1969 }
1970
1971 fxn->is_entrypoint = flags & 0x1;
1972 if (flags & 0x4)
1973 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1974 }
1975
1976 /**
1977 * Serialize NIR into a binary blob.
1978 *
1979 * \param strip Don't serialize information only useful for debugging,
1980 * such as variable names, making cache hits from similar
1981 * shaders more likely.
1982 */
1983 void
1984 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1985 {
1986 write_ctx ctx = {0};
1987 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1988 ctx.blob = blob;
1989 ctx.nir = nir;
1990 ctx.strip = strip;
1991 util_dynarray_init(&ctx.phi_fixups, NULL);
1992
1993 size_t idx_size_offset = blob_reserve_uint32(blob);
1994
1995 struct shader_info info = nir->info;
1996 uint32_t strings = 0;
1997 if (!strip && info.name)
1998 strings |= 0x1;
1999 if (!strip && info.label)
2000 strings |= 0x2;
2001 blob_write_uint32(blob, strings);
2002 if (!strip && info.name)
2003 blob_write_string(blob, info.name);
2004 if (!strip && info.label)
2005 blob_write_string(blob, info.label);
2006 info.name = info.label = NULL;
2007 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2008
2009 write_var_list(&ctx, &nir->uniforms);
2010 write_var_list(&ctx, &nir->inputs);
2011 write_var_list(&ctx, &nir->outputs);
2012 write_var_list(&ctx, &nir->shared);
2013 write_var_list(&ctx, &nir->globals);
2014 write_var_list(&ctx, &nir->system_values);
2015
2016 blob_write_uint32(blob, nir->num_inputs);
2017 blob_write_uint32(blob, nir->num_uniforms);
2018 blob_write_uint32(blob, nir->num_outputs);
2019 blob_write_uint32(blob, nir->num_shared);
2020 blob_write_uint32(blob, nir->scratch_size);
2021
2022 blob_write_uint32(blob, exec_list_length(&nir->functions));
2023 nir_foreach_function(fxn, nir) {
2024 write_function(&ctx, fxn);
2025 }
2026
2027 nir_foreach_function(fxn, nir) {
2028 if (fxn->impl)
2029 write_function_impl(&ctx, fxn->impl);
2030 }
2031
2032 blob_write_uint32(blob, nir->constant_data_size);
2033 if (nir->constant_data_size > 0)
2034 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2035
2036 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
2037
2038 _mesa_hash_table_destroy(ctx.remap_table, NULL);
2039 util_dynarray_fini(&ctx.phi_fixups);
2040 }
2041
2042 nir_shader *
2043 nir_deserialize(void *mem_ctx,
2044 const struct nir_shader_compiler_options *options,
2045 struct blob_reader *blob)
2046 {
2047 read_ctx ctx = {0};
2048 ctx.blob = blob;
2049 list_inithead(&ctx.phi_srcs);
2050 ctx.idx_table_len = blob_read_uint32(blob);
2051 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2052
2053 uint32_t strings = blob_read_uint32(blob);
2054 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2055 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2056
2057 struct shader_info info;
2058 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2059
2060 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2061
2062 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2063 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2064
2065 ctx.nir->info = info;
2066
2067 read_var_list(&ctx, &ctx.nir->uniforms);
2068 read_var_list(&ctx, &ctx.nir->inputs);
2069 read_var_list(&ctx, &ctx.nir->outputs);
2070 read_var_list(&ctx, &ctx.nir->shared);
2071 read_var_list(&ctx, &ctx.nir->globals);
2072 read_var_list(&ctx, &ctx.nir->system_values);
2073
2074 ctx.nir->num_inputs = blob_read_uint32(blob);
2075 ctx.nir->num_uniforms = blob_read_uint32(blob);
2076 ctx.nir->num_outputs = blob_read_uint32(blob);
2077 ctx.nir->num_shared = blob_read_uint32(blob);
2078 ctx.nir->scratch_size = blob_read_uint32(blob);
2079
2080 unsigned num_functions = blob_read_uint32(blob);
2081 for (unsigned i = 0; i < num_functions; i++)
2082 read_function(&ctx);
2083
2084 nir_foreach_function(fxn, ctx.nir) {
2085 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2086 fxn->impl = read_function_impl(&ctx, fxn);
2087 }
2088
2089 ctx.nir->constant_data_size = blob_read_uint32(blob);
2090 if (ctx.nir->constant_data_size > 0) {
2091 ctx.nir->constant_data =
2092 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2093 blob_copy_bytes(blob, ctx.nir->constant_data,
2094 ctx.nir->constant_data_size);
2095 }
2096
2097 free(ctx.idx_table);
2098
2099 return ctx.nir;
2100 }
2101
2102 void
2103 nir_shader_serialize_deserialize(nir_shader *shader)
2104 {
2105 const struct nir_shader_compiler_options *options = shader->options;
2106
2107 struct blob writer;
2108 blob_init(&writer);
2109 nir_serialize(&writer, shader, false);
2110
2111 /* Delete all of dest's ralloc children but leave dest alone */
2112 void *dead_ctx = ralloc_context(NULL);
2113 ralloc_adopt(dead_ctx, shader);
2114 ralloc_free(dead_ctx);
2115
2116 dead_ctx = ralloc_context(NULL);
2117
2118 struct blob_reader reader;
2119 blob_reader_init(&reader, writer.data, writer.size);
2120 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2121
2122 blob_finish(&writer);
2123
2124 nir_shader_replace(shader, copy);
2125 ralloc_free(dead_ctx);
2126 }