nir/serialize: pack src better and limit the object count to 1M from 1G
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* Don't write optional data such as variable names. */
55 bool strip;
56 } write_ctx;
57
58 typedef struct {
59 nir_shader *nir;
60
61 struct blob_reader *blob;
62
63 /* the next index to assign to a NIR in-memory object */
64 uint32_t next_idx;
65
66 /* The length of the index -> object table */
67 uint32_t idx_table_len;
68
69 /* map from index to deserialized pointer */
70 void **idx_table;
71
72 /* List of phi sources. */
73 struct list_head phi_srcs;
74
75 } read_ctx;
76
77 static void
78 write_add_object(write_ctx *ctx, const void *obj)
79 {
80 uint32_t index = ctx->next_idx++;
81 assert(index != MAX_OBJECT_IDS);
82 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
83 }
84
85 static uint32_t
86 write_lookup_object(write_ctx *ctx, const void *obj)
87 {
88 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
89 assert(entry);
90 return (uint32_t)(uintptr_t) entry->data;
91 }
92
93 static void
94 write_object(write_ctx *ctx, const void *obj)
95 {
96 blob_write_uint32(ctx->blob, write_lookup_object(ctx, obj));
97 }
98
99 static void
100 read_add_object(read_ctx *ctx, void *obj)
101 {
102 assert(ctx->next_idx < ctx->idx_table_len);
103 ctx->idx_table[ctx->next_idx++] = obj;
104 }
105
106 static void *
107 read_lookup_object(read_ctx *ctx, uint32_t idx)
108 {
109 assert(idx < ctx->idx_table_len);
110 return ctx->idx_table[idx];
111 }
112
113 static void *
114 read_object(read_ctx *ctx)
115 {
116 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
117 }
118
119 static uint32_t
120 encode_bit_size_3bits(uint8_t bit_size)
121 {
122 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
123 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
124 if (bit_size)
125 return util_logbase2(bit_size) + 1;
126 return 0;
127 }
128
129 static uint8_t
130 decode_bit_size_3bits(uint8_t bit_size)
131 {
132 if (bit_size)
133 return 1 << (bit_size - 1);
134 return 0;
135 }
136
137 static uint8_t
138 encode_num_components_in_3bits(uint8_t num_components)
139 {
140 if (num_components <= 4)
141 return num_components;
142 if (num_components == 8)
143 return 5;
144 if (num_components == 16)
145 return 6;
146
147 unreachable("invalid number in num_components");
148 return 0;
149 }
150
151 static uint8_t
152 decode_num_components_in_3bits(uint8_t value)
153 {
154 if (value <= 4)
155 return value;
156 if (value == 5)
157 return 8;
158 if (value == 6)
159 return 16;
160
161 unreachable("invalid num_components encoding");
162 return 0;
163 }
164
165 static void
166 write_constant(write_ctx *ctx, const nir_constant *c)
167 {
168 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
169 blob_write_uint32(ctx->blob, c->num_elements);
170 for (unsigned i = 0; i < c->num_elements; i++)
171 write_constant(ctx, c->elements[i]);
172 }
173
174 static nir_constant *
175 read_constant(read_ctx *ctx, nir_variable *nvar)
176 {
177 nir_constant *c = ralloc(nvar, nir_constant);
178
179 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
180 c->num_elements = blob_read_uint32(ctx->blob);
181 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
182 for (unsigned i = 0; i < c->num_elements; i++)
183 c->elements[i] = read_constant(ctx, nvar);
184
185 return c;
186 }
187
188 union packed_var {
189 uint32_t u32;
190 struct {
191 unsigned has_name:1;
192 unsigned has_constant_initializer:1;
193 unsigned has_interface_type:1;
194 unsigned num_state_slots:13;
195 unsigned num_members:16;
196 } u;
197 };
198
199 static void
200 write_variable(write_ctx *ctx, const nir_variable *var)
201 {
202 write_add_object(ctx, var);
203 encode_type_to_blob(ctx->blob, var->type);
204
205 assert(var->num_state_slots < (1 << 13));
206 assert(var->num_members < (1 << 16));
207
208 STATIC_ASSERT(sizeof(union packed_var) == 4);
209 union packed_var flags;
210 flags.u32 = 0;
211
212 flags.u.has_name = !ctx->strip && var->name;
213 flags.u.has_constant_initializer = !!(var->constant_initializer);
214 flags.u.has_interface_type = !!(var->interface_type);
215 flags.u.num_state_slots = var->num_state_slots;
216 flags.u.num_members = var->num_members;
217
218 blob_write_uint32(ctx->blob, flags.u32);
219
220 if (flags.u.has_name)
221 blob_write_string(ctx->blob, var->name);
222
223 struct nir_variable_data data = var->data;
224
225 /* When stripping, we expect that the location is no longer needed,
226 * which is typically after shaders are linked.
227 */
228 if (ctx->strip &&
229 data.mode != nir_var_shader_in &&
230 data.mode != nir_var_shader_out)
231 data.location = 0;
232
233 blob_write_bytes(ctx->blob, &data, sizeof(data));
234
235 for (unsigned i = 0; i < var->num_state_slots; i++) {
236 blob_write_bytes(ctx->blob, &var->state_slots[i],
237 sizeof(var->state_slots[i]));
238 }
239 if (var->constant_initializer)
240 write_constant(ctx, var->constant_initializer);
241 if (var->interface_type)
242 encode_type_to_blob(ctx->blob, var->interface_type);
243 if (var->num_members > 0) {
244 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
245 var->num_members * sizeof(*var->members));
246 }
247 }
248
249 static nir_variable *
250 read_variable(read_ctx *ctx)
251 {
252 nir_variable *var = rzalloc(ctx->nir, nir_variable);
253 read_add_object(ctx, var);
254
255 var->type = decode_type_from_blob(ctx->blob);
256
257 union packed_var flags;
258 flags.u32 = blob_read_uint32(ctx->blob);
259
260 if (flags.u.has_name) {
261 const char *name = blob_read_string(ctx->blob);
262 var->name = ralloc_strdup(var, name);
263 } else {
264 var->name = NULL;
265 }
266 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
267 var->num_state_slots = flags.u.num_state_slots;
268 if (var->num_state_slots != 0) {
269 var->state_slots = ralloc_array(var, nir_state_slot,
270 var->num_state_slots);
271 for (unsigned i = 0; i < var->num_state_slots; i++) {
272 blob_copy_bytes(ctx->blob, &var->state_slots[i],
273 sizeof(var->state_slots[i]));
274 }
275 }
276 if (flags.u.has_constant_initializer)
277 var->constant_initializer = read_constant(ctx, var);
278 else
279 var->constant_initializer = NULL;
280 if (flags.u.has_interface_type)
281 var->interface_type = decode_type_from_blob(ctx->blob);
282 else
283 var->interface_type = NULL;
284 var->num_members = flags.u.num_members;
285 if (var->num_members > 0) {
286 var->members = ralloc_array(var, struct nir_variable_data,
287 var->num_members);
288 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
289 var->num_members * sizeof(*var->members));
290 }
291
292 return var;
293 }
294
295 static void
296 write_var_list(write_ctx *ctx, const struct exec_list *src)
297 {
298 blob_write_uint32(ctx->blob, exec_list_length(src));
299 foreach_list_typed(nir_variable, var, node, src) {
300 write_variable(ctx, var);
301 }
302 }
303
304 static void
305 read_var_list(read_ctx *ctx, struct exec_list *dst)
306 {
307 exec_list_make_empty(dst);
308 unsigned num_vars = blob_read_uint32(ctx->blob);
309 for (unsigned i = 0; i < num_vars; i++) {
310 nir_variable *var = read_variable(ctx);
311 exec_list_push_tail(dst, &var->node);
312 }
313 }
314
315 static void
316 write_register(write_ctx *ctx, const nir_register *reg)
317 {
318 write_add_object(ctx, reg);
319 blob_write_uint32(ctx->blob, reg->num_components);
320 blob_write_uint32(ctx->blob, reg->bit_size);
321 blob_write_uint32(ctx->blob, reg->num_array_elems);
322 blob_write_uint32(ctx->blob, reg->index);
323 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
324 if (!ctx->strip && reg->name)
325 blob_write_string(ctx->blob, reg->name);
326 }
327
328 static nir_register *
329 read_register(read_ctx *ctx)
330 {
331 nir_register *reg = ralloc(ctx->nir, nir_register);
332 read_add_object(ctx, reg);
333 reg->num_components = blob_read_uint32(ctx->blob);
334 reg->bit_size = blob_read_uint32(ctx->blob);
335 reg->num_array_elems = blob_read_uint32(ctx->blob);
336 reg->index = blob_read_uint32(ctx->blob);
337 bool has_name = blob_read_uint32(ctx->blob);
338 if (has_name) {
339 const char *name = blob_read_string(ctx->blob);
340 reg->name = ralloc_strdup(reg, name);
341 } else {
342 reg->name = NULL;
343 }
344
345 list_inithead(&reg->uses);
346 list_inithead(&reg->defs);
347 list_inithead(&reg->if_uses);
348
349 return reg;
350 }
351
352 static void
353 write_reg_list(write_ctx *ctx, const struct exec_list *src)
354 {
355 blob_write_uint32(ctx->blob, exec_list_length(src));
356 foreach_list_typed(nir_register, reg, node, src)
357 write_register(ctx, reg);
358 }
359
360 static void
361 read_reg_list(read_ctx *ctx, struct exec_list *dst)
362 {
363 exec_list_make_empty(dst);
364 unsigned num_regs = blob_read_uint32(ctx->blob);
365 for (unsigned i = 0; i < num_regs; i++) {
366 nir_register *reg = read_register(ctx);
367 exec_list_push_tail(dst, &reg->node);
368 }
369 }
370
371 union packed_src {
372 uint32_t u32;
373 struct {
374 unsigned is_ssa:1; /* <-- Header */
375 unsigned is_indirect:1;
376 unsigned object_idx:20;
377 unsigned _footer:10; /* <-- Footer */
378 } any;
379 struct {
380 unsigned _header:22; /* <-- Header */
381 unsigned negate:1; /* <-- Footer */
382 unsigned abs:1;
383 unsigned swizzle_x:2;
384 unsigned swizzle_y:2;
385 unsigned swizzle_z:2;
386 unsigned swizzle_w:2;
387 } alu;
388 struct {
389 unsigned _header:22; /* <-- Header */
390 unsigned src_type:5; /* <-- Footer */
391 unsigned _pad:5;
392 } tex;
393 };
394
395 static void
396 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
397 {
398 /* Since sources are very frequent, we try to save some space when storing
399 * them. In particular, we store whether the source is a register and
400 * whether the register has an indirect index in the low two bits. We can
401 * assume that the high two bits of the index are zero, since otherwise our
402 * address space would've been exhausted allocating the remap table!
403 */
404 header.any.is_ssa = src->is_ssa;
405 if (src->is_ssa) {
406 header.any.object_idx = write_lookup_object(ctx, src->ssa);
407 blob_write_uint32(ctx->blob, header.u32);
408 } else {
409 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
410 header.any.is_indirect = !!src->reg.indirect;
411 blob_write_uint32(ctx->blob, header.u32);
412 blob_write_uint32(ctx->blob, src->reg.base_offset);
413 if (src->reg.indirect) {
414 union packed_src header = {0};
415 write_src_full(ctx, src->reg.indirect, header);
416 }
417 }
418 }
419
420 static void
421 write_src(write_ctx *ctx, const nir_src *src)
422 {
423 union packed_src header = {0};
424 write_src_full(ctx, src, header);
425 }
426
427 static union packed_src
428 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
429 {
430 STATIC_ASSERT(sizeof(union packed_src) == 4);
431 union packed_src header;
432 header.u32 = blob_read_uint32(ctx->blob);
433
434 src->is_ssa = header.any.is_ssa;
435 if (src->is_ssa) {
436 src->ssa = read_lookup_object(ctx, header.any.object_idx);
437 } else {
438 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
439 src->reg.base_offset = blob_read_uint32(ctx->blob);
440 if (header.any.is_indirect) {
441 src->reg.indirect = ralloc(mem_ctx, nir_src);
442 read_src(ctx, src->reg.indirect, mem_ctx);
443 } else {
444 src->reg.indirect = NULL;
445 }
446 }
447 return header;
448 }
449
450 union packed_dest {
451 uint8_t u8;
452 struct {
453 uint8_t is_ssa:1;
454 uint8_t has_name:1;
455 uint8_t num_components:3;
456 uint8_t bit_size:3;
457 } ssa;
458 struct {
459 uint8_t is_ssa:1;
460 uint8_t is_indirect:1;
461 uint8_t _pad:6;
462 } reg;
463 };
464
465 union packed_instr {
466 uint32_t u32;
467 struct {
468 unsigned instr_type:4; /* always present */
469 unsigned _pad:20;
470 unsigned dest:8; /* always last */
471 } any;
472 struct {
473 unsigned instr_type:4;
474 unsigned exact:1;
475 unsigned no_signed_wrap:1;
476 unsigned no_unsigned_wrap:1;
477 unsigned saturate:1;
478 unsigned writemask:4;
479 unsigned op:9;
480 unsigned _pad:3;
481 unsigned dest:8;
482 } alu;
483 struct {
484 unsigned instr_type:4;
485 unsigned deref_type:3;
486 unsigned mode:10;
487 unsigned _pad:7;
488 unsigned dest:8;
489 } deref;
490 struct {
491 unsigned instr_type:4;
492 unsigned intrinsic:9;
493 unsigned num_components:3;
494 unsigned _pad:8;
495 unsigned dest:8;
496 } intrinsic;
497 struct {
498 unsigned instr_type:4;
499 unsigned last_component:4;
500 unsigned bit_size:3;
501 unsigned _pad:21;
502 } load_const;
503 struct {
504 unsigned instr_type:4;
505 unsigned last_component:4;
506 unsigned bit_size:3;
507 unsigned _pad:21;
508 } undef;
509 struct {
510 unsigned instr_type:4;
511 unsigned num_srcs:4;
512 unsigned op:4;
513 unsigned texture_array_size:12;
514 unsigned dest:8;
515 } tex;
516 struct {
517 unsigned instr_type:4;
518 unsigned num_srcs:20;
519 unsigned dest:8;
520 } phi;
521 struct {
522 unsigned instr_type:4;
523 unsigned type:2;
524 unsigned _pad:26;
525 } jump;
526 };
527
528 /* Write "lo24" as low 24 bits in the first uint32. */
529 static void
530 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header)
531 {
532 STATIC_ASSERT(sizeof(union packed_dest) == 1);
533 union packed_dest dest;
534 dest.u8 = 0;
535
536 dest.ssa.is_ssa = dst->is_ssa;
537 if (dst->is_ssa) {
538 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
539 dest.ssa.num_components =
540 encode_num_components_in_3bits(dst->ssa.num_components);
541 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
542 } else {
543 dest.reg.is_indirect = !!(dst->reg.indirect);
544 }
545
546 header.any.dest = dest.u8;
547 blob_write_uint32(ctx->blob, header.u32);
548
549 if (dst->is_ssa) {
550 write_add_object(ctx, &dst->ssa);
551 if (dest.ssa.has_name)
552 blob_write_string(ctx->blob, dst->ssa.name);
553 } else {
554 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
555 blob_write_uint32(ctx->blob, dst->reg.base_offset);
556 if (dst->reg.indirect)
557 write_src(ctx, dst->reg.indirect);
558 }
559 }
560
561 static void
562 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
563 union packed_instr header)
564 {
565 union packed_dest dest;
566 dest.u8 = header.any.dest;
567
568 if (dest.ssa.is_ssa) {
569 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
570 unsigned num_components =
571 decode_num_components_in_3bits(dest.ssa.num_components);
572 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
573 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
574 read_add_object(ctx, &dst->ssa);
575 } else {
576 dst->reg.reg = read_object(ctx);
577 dst->reg.base_offset = blob_read_uint32(ctx->blob);
578 if (dest.reg.is_indirect) {
579 dst->reg.indirect = ralloc(instr, nir_src);
580 read_src(ctx, dst->reg.indirect, instr);
581 }
582 }
583 }
584
585 static void
586 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
587 {
588 /* 9 bits for nir_op */
589 STATIC_ASSERT(nir_num_opcodes <= 512);
590 union packed_instr header;
591 header.u32 = 0;
592
593 header.alu.instr_type = alu->instr.type;
594 header.alu.exact = alu->exact;
595 header.alu.no_signed_wrap = alu->no_signed_wrap;
596 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
597 header.alu.saturate = alu->dest.saturate;
598 header.alu.writemask = alu->dest.write_mask;
599 header.alu.op = alu->op;
600
601 write_dest(ctx, &alu->dest.dest, header);
602
603 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
604 union packed_src src;
605 src.u32 = 0;
606
607 src.alu.negate = alu->src[i].negate;
608 src.alu.abs = alu->src[i].abs;
609 src.alu.swizzle_x = alu->src[i].swizzle[0];
610 src.alu.swizzle_y = alu->src[i].swizzle[1];
611 src.alu.swizzle_z = alu->src[i].swizzle[2];
612 src.alu.swizzle_w = alu->src[i].swizzle[3];
613
614 write_src_full(ctx, &alu->src[i].src, src);
615 }
616 }
617
618 static nir_alu_instr *
619 read_alu(read_ctx *ctx, union packed_instr header)
620 {
621 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
622
623 alu->exact = header.alu.exact;
624 alu->no_signed_wrap = header.alu.no_signed_wrap;
625 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
626 alu->dest.saturate = header.alu.saturate;
627 alu->dest.write_mask = header.alu.writemask;
628
629 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
630
631 for (unsigned i = 0; i < nir_op_infos[header.alu.op].num_inputs; i++) {
632 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
633
634 alu->src[i].negate = src.alu.negate;
635 alu->src[i].abs = src.alu.abs;
636 alu->src[i].swizzle[0] = src.alu.swizzle_x;
637 alu->src[i].swizzle[1] = src.alu.swizzle_y;
638 alu->src[i].swizzle[2] = src.alu.swizzle_z;
639 alu->src[i].swizzle[3] = src.alu.swizzle_w;
640 }
641
642 return alu;
643 }
644
645 static void
646 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
647 {
648 assert(deref->deref_type < 8);
649 assert(deref->mode < (1 << 10));
650
651 union packed_instr header;
652 header.u32 = 0;
653
654 header.deref.instr_type = deref->instr.type;
655 header.deref.deref_type = deref->deref_type;
656 header.deref.mode = deref->mode;
657
658 write_dest(ctx, &deref->dest, header);
659 encode_type_to_blob(ctx->blob, deref->type);
660
661 if (deref->deref_type == nir_deref_type_var) {
662 write_object(ctx, deref->var);
663 return;
664 }
665
666 write_src(ctx, &deref->parent);
667
668 switch (deref->deref_type) {
669 case nir_deref_type_struct:
670 blob_write_uint32(ctx->blob, deref->strct.index);
671 break;
672
673 case nir_deref_type_array:
674 case nir_deref_type_ptr_as_array:
675 write_src(ctx, &deref->arr.index);
676 break;
677
678 case nir_deref_type_cast:
679 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
680 break;
681
682 case nir_deref_type_array_wildcard:
683 /* Nothing to do */
684 break;
685
686 default:
687 unreachable("Invalid deref type");
688 }
689 }
690
691 static nir_deref_instr *
692 read_deref(read_ctx *ctx, union packed_instr header)
693 {
694 nir_deref_type deref_type = header.deref.deref_type;
695 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
696
697 read_dest(ctx, &deref->dest, &deref->instr, header);
698
699 deref->mode = header.deref.mode;
700 deref->type = decode_type_from_blob(ctx->blob);
701
702 if (deref_type == nir_deref_type_var) {
703 deref->var = read_object(ctx);
704 return deref;
705 }
706
707 read_src(ctx, &deref->parent, &deref->instr);
708
709 switch (deref->deref_type) {
710 case nir_deref_type_struct:
711 deref->strct.index = blob_read_uint32(ctx->blob);
712 break;
713
714 case nir_deref_type_array:
715 case nir_deref_type_ptr_as_array:
716 read_src(ctx, &deref->arr.index, &deref->instr);
717 break;
718
719 case nir_deref_type_cast:
720 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
721 break;
722
723 case nir_deref_type_array_wildcard:
724 /* Nothing to do */
725 break;
726
727 default:
728 unreachable("Invalid deref type");
729 }
730
731 return deref;
732 }
733
734 static void
735 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
736 {
737 /* 9 bits for nir_intrinsic_op */
738 STATIC_ASSERT(nir_num_intrinsics <= 512);
739 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
740 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
741 assert(intrin->intrinsic < 512);
742
743 union packed_instr header;
744 header.u32 = 0;
745
746 header.intrinsic.instr_type = intrin->instr.type;
747 header.intrinsic.intrinsic = intrin->intrinsic;
748 header.intrinsic.num_components =
749 encode_num_components_in_3bits(intrin->num_components);
750
751 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
752 write_dest(ctx, &intrin->dest, header);
753 else
754 blob_write_uint32(ctx->blob, header.u32);
755
756 for (unsigned i = 0; i < num_srcs; i++)
757 write_src(ctx, &intrin->src[i]);
758
759 for (unsigned i = 0; i < num_indices; i++)
760 blob_write_uint32(ctx->blob, intrin->const_index[i]);
761 }
762
763 static nir_intrinsic_instr *
764 read_intrinsic(read_ctx *ctx, union packed_instr header)
765 {
766 nir_intrinsic_op op = header.intrinsic.intrinsic;
767 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
768
769 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
770 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
771
772 intrin->num_components =
773 decode_num_components_in_3bits(header.intrinsic.num_components);
774
775 if (nir_intrinsic_infos[op].has_dest)
776 read_dest(ctx, &intrin->dest, &intrin->instr, header);
777
778 for (unsigned i = 0; i < num_srcs; i++)
779 read_src(ctx, &intrin->src[i], &intrin->instr);
780
781 for (unsigned i = 0; i < num_indices; i++)
782 intrin->const_index[i] = blob_read_uint32(ctx->blob);
783
784 return intrin;
785 }
786
787 static void
788 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
789 {
790 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
791 union packed_instr header;
792 header.u32 = 0;
793
794 header.load_const.instr_type = lc->instr.type;
795 header.load_const.last_component = lc->def.num_components - 1;
796 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
797
798 blob_write_uint32(ctx->blob, header.u32);
799 blob_write_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
800 write_add_object(ctx, &lc->def);
801 }
802
803 static nir_load_const_instr *
804 read_load_const(read_ctx *ctx, union packed_instr header)
805 {
806 nir_load_const_instr *lc =
807 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
808 decode_bit_size_3bits(header.load_const.bit_size));
809
810 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
811 read_add_object(ctx, &lc->def);
812 return lc;
813 }
814
815 static void
816 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
817 {
818 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
819
820 union packed_instr header;
821 header.u32 = 0;
822
823 header.undef.instr_type = undef->instr.type;
824 header.undef.last_component = undef->def.num_components - 1;
825 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
826
827 blob_write_uint32(ctx->blob, header.u32);
828 write_add_object(ctx, &undef->def);
829 }
830
831 static nir_ssa_undef_instr *
832 read_ssa_undef(read_ctx *ctx, union packed_instr header)
833 {
834 nir_ssa_undef_instr *undef =
835 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
836 decode_bit_size_3bits(header.undef.bit_size));
837
838 read_add_object(ctx, &undef->def);
839 return undef;
840 }
841
842 union packed_tex_data {
843 uint32_t u32;
844 struct {
845 enum glsl_sampler_dim sampler_dim:4;
846 nir_alu_type dest_type:8;
847 unsigned coord_components:3;
848 unsigned is_array:1;
849 unsigned is_shadow:1;
850 unsigned is_new_style_shadow:1;
851 unsigned component:2;
852 unsigned unused:10; /* Mark unused for valgrind. */
853 } u;
854 };
855
856 static void
857 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
858 {
859 assert(tex->num_srcs < 16);
860 assert(tex->op < 16);
861 assert(tex->texture_array_size < 1024);
862
863 union packed_instr header;
864 header.u32 = 0;
865
866 header.tex.instr_type = tex->instr.type;
867 header.tex.num_srcs = tex->num_srcs;
868 header.tex.op = tex->op;
869 header.tex.texture_array_size = tex->texture_array_size;
870
871 write_dest(ctx, &tex->dest, header);
872
873 blob_write_uint32(ctx->blob, tex->texture_index);
874 blob_write_uint32(ctx->blob, tex->sampler_index);
875 if (tex->op == nir_texop_tg4)
876 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
877
878 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
879 union packed_tex_data packed = {
880 .u.sampler_dim = tex->sampler_dim,
881 .u.dest_type = tex->dest_type,
882 .u.coord_components = tex->coord_components,
883 .u.is_array = tex->is_array,
884 .u.is_shadow = tex->is_shadow,
885 .u.is_new_style_shadow = tex->is_new_style_shadow,
886 .u.component = tex->component,
887 };
888 blob_write_uint32(ctx->blob, packed.u32);
889
890 for (unsigned i = 0; i < tex->num_srcs; i++) {
891 union packed_src src;
892 src.u32 = 0;
893 src.tex.src_type = tex->src[i].src_type;
894 write_src_full(ctx, &tex->src[i].src, src);
895 }
896 }
897
898 static nir_tex_instr *
899 read_tex(read_ctx *ctx, union packed_instr header)
900 {
901 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
902
903 read_dest(ctx, &tex->dest, &tex->instr, header);
904
905 tex->op = header.tex.op;
906 tex->texture_index = blob_read_uint32(ctx->blob);
907 tex->texture_array_size = header.tex.texture_array_size;
908 tex->sampler_index = blob_read_uint32(ctx->blob);
909 if (tex->op == nir_texop_tg4)
910 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
911
912 union packed_tex_data packed;
913 packed.u32 = blob_read_uint32(ctx->blob);
914 tex->sampler_dim = packed.u.sampler_dim;
915 tex->dest_type = packed.u.dest_type;
916 tex->coord_components = packed.u.coord_components;
917 tex->is_array = packed.u.is_array;
918 tex->is_shadow = packed.u.is_shadow;
919 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
920 tex->component = packed.u.component;
921
922 for (unsigned i = 0; i < tex->num_srcs; i++) {
923 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
924 tex->src[i].src_type = src.tex.src_type;
925 }
926
927 return tex;
928 }
929
930 static void
931 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
932 {
933 union packed_instr header;
934 header.u32 = 0;
935
936 header.phi.instr_type = phi->instr.type;
937 header.phi.num_srcs = exec_list_length(&phi->srcs);
938
939 /* Phi nodes are special, since they may reference SSA definitions and
940 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
941 * and then store enough information so that a later fixup pass can fill
942 * them in correctly.
943 */
944 write_dest(ctx, &phi->dest, header);
945
946 nir_foreach_phi_src(src, phi) {
947 assert(src->src.is_ssa);
948 size_t blob_offset = blob_reserve_uint32(ctx->blob);
949 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
950 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
951 write_phi_fixup fixup = {
952 .blob_offset = blob_offset,
953 .src = src->src.ssa,
954 .block = src->pred,
955 };
956 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
957 }
958 }
959
960 static void
961 write_fixup_phis(write_ctx *ctx)
962 {
963 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
964 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
965 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
966 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
967 }
968
969 util_dynarray_clear(&ctx->phi_fixups);
970 }
971
972 static nir_phi_instr *
973 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
974 {
975 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
976
977 read_dest(ctx, &phi->dest, &phi->instr, header);
978
979 /* For similar reasons as before, we just store the index directly into the
980 * pointer, and let a later pass resolve the phi sources.
981 *
982 * In order to ensure that the copied sources (which are just the indices
983 * from the blob for now) don't get inserted into the old shader's use-def
984 * lists, we have to add the phi instruction *before* we set up its
985 * sources.
986 */
987 nir_instr_insert_after_block(blk, &phi->instr);
988
989 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
990 nir_phi_src *src = ralloc(phi, nir_phi_src);
991
992 src->src.is_ssa = true;
993 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
994 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
995
996 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
997 * we have to set the parent_instr manually. It doesn't really matter
998 * when we do it, so we might as well do it here.
999 */
1000 src->src.parent_instr = &phi->instr;
1001
1002 /* Stash it in the list of phi sources. We'll walk this list and fix up
1003 * sources at the very end of read_function_impl.
1004 */
1005 list_add(&src->src.use_link, &ctx->phi_srcs);
1006
1007 exec_list_push_tail(&phi->srcs, &src->node);
1008 }
1009
1010 return phi;
1011 }
1012
1013 static void
1014 read_fixup_phis(read_ctx *ctx)
1015 {
1016 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1017 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1018 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1019
1020 /* Remove from this list */
1021 list_del(&src->src.use_link);
1022
1023 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1024 }
1025 assert(list_is_empty(&ctx->phi_srcs));
1026 }
1027
1028 static void
1029 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1030 {
1031 assert(jmp->type < 4);
1032
1033 union packed_instr header;
1034 header.u32 = 0;
1035
1036 header.jump.instr_type = jmp->instr.type;
1037 header.jump.type = jmp->type;
1038
1039 blob_write_uint32(ctx->blob, header.u32);
1040 }
1041
1042 static nir_jump_instr *
1043 read_jump(read_ctx *ctx, union packed_instr header)
1044 {
1045 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1046 return jmp;
1047 }
1048
1049 static void
1050 write_call(write_ctx *ctx, const nir_call_instr *call)
1051 {
1052 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1053
1054 for (unsigned i = 0; i < call->num_params; i++)
1055 write_src(ctx, &call->params[i]);
1056 }
1057
1058 static nir_call_instr *
1059 read_call(read_ctx *ctx)
1060 {
1061 nir_function *callee = read_object(ctx);
1062 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1063
1064 for (unsigned i = 0; i < call->num_params; i++)
1065 read_src(ctx, &call->params[i], call);
1066
1067 return call;
1068 }
1069
1070 static void
1071 write_instr(write_ctx *ctx, const nir_instr *instr)
1072 {
1073 /* We have only 4 bits for the instruction type. */
1074 assert(instr->type < 16);
1075
1076 switch (instr->type) {
1077 case nir_instr_type_alu:
1078 write_alu(ctx, nir_instr_as_alu(instr));
1079 break;
1080 case nir_instr_type_deref:
1081 write_deref(ctx, nir_instr_as_deref(instr));
1082 break;
1083 case nir_instr_type_intrinsic:
1084 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1085 break;
1086 case nir_instr_type_load_const:
1087 write_load_const(ctx, nir_instr_as_load_const(instr));
1088 break;
1089 case nir_instr_type_ssa_undef:
1090 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1091 break;
1092 case nir_instr_type_tex:
1093 write_tex(ctx, nir_instr_as_tex(instr));
1094 break;
1095 case nir_instr_type_phi:
1096 write_phi(ctx, nir_instr_as_phi(instr));
1097 break;
1098 case nir_instr_type_jump:
1099 write_jump(ctx, nir_instr_as_jump(instr));
1100 break;
1101 case nir_instr_type_call:
1102 blob_write_uint32(ctx->blob, instr->type);
1103 write_call(ctx, nir_instr_as_call(instr));
1104 break;
1105 case nir_instr_type_parallel_copy:
1106 unreachable("Cannot write parallel copies");
1107 default:
1108 unreachable("bad instr type");
1109 }
1110 }
1111
1112 static void
1113 read_instr(read_ctx *ctx, nir_block *block)
1114 {
1115 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1116 union packed_instr header;
1117 header.u32 = blob_read_uint32(ctx->blob);
1118 nir_instr *instr;
1119
1120 switch (header.any.instr_type) {
1121 case nir_instr_type_alu:
1122 instr = &read_alu(ctx, header)->instr;
1123 break;
1124 case nir_instr_type_deref:
1125 instr = &read_deref(ctx, header)->instr;
1126 break;
1127 case nir_instr_type_intrinsic:
1128 instr = &read_intrinsic(ctx, header)->instr;
1129 break;
1130 case nir_instr_type_load_const:
1131 instr = &read_load_const(ctx, header)->instr;
1132 break;
1133 case nir_instr_type_ssa_undef:
1134 instr = &read_ssa_undef(ctx, header)->instr;
1135 break;
1136 case nir_instr_type_tex:
1137 instr = &read_tex(ctx, header)->instr;
1138 break;
1139 case nir_instr_type_phi:
1140 /* Phi instructions are a bit of a special case when reading because we
1141 * don't want inserting the instruction to automatically handle use/defs
1142 * for us. Instead, we need to wait until all the blocks/instructions
1143 * are read so that we can set their sources up.
1144 */
1145 read_phi(ctx, block, header);
1146 return;
1147 case nir_instr_type_jump:
1148 instr = &read_jump(ctx, header)->instr;
1149 break;
1150 case nir_instr_type_call:
1151 instr = &read_call(ctx)->instr;
1152 break;
1153 case nir_instr_type_parallel_copy:
1154 unreachable("Cannot read parallel copies");
1155 default:
1156 unreachable("bad instr type");
1157 }
1158
1159 nir_instr_insert_after_block(block, instr);
1160 }
1161
1162 static void
1163 write_block(write_ctx *ctx, const nir_block *block)
1164 {
1165 write_add_object(ctx, block);
1166 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1167 nir_foreach_instr(instr, block)
1168 write_instr(ctx, instr);
1169 }
1170
1171 static void
1172 read_block(read_ctx *ctx, struct exec_list *cf_list)
1173 {
1174 /* Don't actually create a new block. Just use the one from the tail of
1175 * the list. NIR guarantees that the tail of the list is a block and that
1176 * no two blocks are side-by-side in the IR; It should be empty.
1177 */
1178 nir_block *block =
1179 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1180
1181 read_add_object(ctx, block);
1182 unsigned num_instrs = blob_read_uint32(ctx->blob);
1183 for (unsigned i = 0; i < num_instrs; i++) {
1184 read_instr(ctx, block);
1185 }
1186 }
1187
1188 static void
1189 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1190
1191 static void
1192 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1193
1194 static void
1195 write_if(write_ctx *ctx, nir_if *nif)
1196 {
1197 write_src(ctx, &nif->condition);
1198
1199 write_cf_list(ctx, &nif->then_list);
1200 write_cf_list(ctx, &nif->else_list);
1201 }
1202
1203 static void
1204 read_if(read_ctx *ctx, struct exec_list *cf_list)
1205 {
1206 nir_if *nif = nir_if_create(ctx->nir);
1207
1208 read_src(ctx, &nif->condition, nif);
1209
1210 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1211
1212 read_cf_list(ctx, &nif->then_list);
1213 read_cf_list(ctx, &nif->else_list);
1214 }
1215
1216 static void
1217 write_loop(write_ctx *ctx, nir_loop *loop)
1218 {
1219 write_cf_list(ctx, &loop->body);
1220 }
1221
1222 static void
1223 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1224 {
1225 nir_loop *loop = nir_loop_create(ctx->nir);
1226
1227 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1228
1229 read_cf_list(ctx, &loop->body);
1230 }
1231
1232 static void
1233 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1234 {
1235 blob_write_uint32(ctx->blob, cf->type);
1236
1237 switch (cf->type) {
1238 case nir_cf_node_block:
1239 write_block(ctx, nir_cf_node_as_block(cf));
1240 break;
1241 case nir_cf_node_if:
1242 write_if(ctx, nir_cf_node_as_if(cf));
1243 break;
1244 case nir_cf_node_loop:
1245 write_loop(ctx, nir_cf_node_as_loop(cf));
1246 break;
1247 default:
1248 unreachable("bad cf type");
1249 }
1250 }
1251
1252 static void
1253 read_cf_node(read_ctx *ctx, struct exec_list *list)
1254 {
1255 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1256
1257 switch (type) {
1258 case nir_cf_node_block:
1259 read_block(ctx, list);
1260 break;
1261 case nir_cf_node_if:
1262 read_if(ctx, list);
1263 break;
1264 case nir_cf_node_loop:
1265 read_loop(ctx, list);
1266 break;
1267 default:
1268 unreachable("bad cf type");
1269 }
1270 }
1271
1272 static void
1273 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1274 {
1275 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1276 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1277 write_cf_node(ctx, cf);
1278 }
1279 }
1280
1281 static void
1282 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1283 {
1284 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1285 for (unsigned i = 0; i < num_cf_nodes; i++)
1286 read_cf_node(ctx, cf_list);
1287 }
1288
1289 static void
1290 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1291 {
1292 write_var_list(ctx, &fi->locals);
1293 write_reg_list(ctx, &fi->registers);
1294 blob_write_uint32(ctx->blob, fi->reg_alloc);
1295
1296 write_cf_list(ctx, &fi->body);
1297 write_fixup_phis(ctx);
1298 }
1299
1300 static nir_function_impl *
1301 read_function_impl(read_ctx *ctx, nir_function *fxn)
1302 {
1303 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1304 fi->function = fxn;
1305
1306 read_var_list(ctx, &fi->locals);
1307 read_reg_list(ctx, &fi->registers);
1308 fi->reg_alloc = blob_read_uint32(ctx->blob);
1309
1310 read_cf_list(ctx, &fi->body);
1311 read_fixup_phis(ctx);
1312
1313 fi->valid_metadata = 0;
1314
1315 return fi;
1316 }
1317
1318 static void
1319 write_function(write_ctx *ctx, const nir_function *fxn)
1320 {
1321 uint32_t flags = fxn->is_entrypoint;
1322 if (fxn->name)
1323 flags |= 0x2;
1324 if (fxn->impl)
1325 flags |= 0x4;
1326 blob_write_uint32(ctx->blob, flags);
1327 if (fxn->name)
1328 blob_write_string(ctx->blob, fxn->name);
1329
1330 write_add_object(ctx, fxn);
1331
1332 blob_write_uint32(ctx->blob, fxn->num_params);
1333 for (unsigned i = 0; i < fxn->num_params; i++) {
1334 uint32_t val =
1335 ((uint32_t)fxn->params[i].num_components) |
1336 ((uint32_t)fxn->params[i].bit_size) << 8;
1337 blob_write_uint32(ctx->blob, val);
1338 }
1339
1340 /* At first glance, it looks like we should write the function_impl here.
1341 * However, call instructions need to be able to reference at least the
1342 * function and those will get processed as we write the function_impls.
1343 * We stop here and write function_impls as a second pass.
1344 */
1345 }
1346
1347 static void
1348 read_function(read_ctx *ctx)
1349 {
1350 uint32_t flags = blob_read_uint32(ctx->blob);
1351 bool has_name = flags & 0x2;
1352 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1353
1354 nir_function *fxn = nir_function_create(ctx->nir, name);
1355
1356 read_add_object(ctx, fxn);
1357
1358 fxn->num_params = blob_read_uint32(ctx->blob);
1359 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1360 for (unsigned i = 0; i < fxn->num_params; i++) {
1361 uint32_t val = blob_read_uint32(ctx->blob);
1362 fxn->params[i].num_components = val & 0xff;
1363 fxn->params[i].bit_size = (val >> 8) & 0xff;
1364 }
1365
1366 fxn->is_entrypoint = flags & 0x1;
1367 if (flags & 0x4)
1368 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1369 }
1370
1371 /**
1372 * Serialize NIR into a binary blob.
1373 *
1374 * \param strip Don't serialize information only useful for debugging,
1375 * such as variable names, making cache hits from similar
1376 * shaders more likely.
1377 */
1378 void
1379 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1380 {
1381 write_ctx ctx = {0};
1382 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1383 ctx.blob = blob;
1384 ctx.nir = nir;
1385 ctx.strip = strip;
1386 util_dynarray_init(&ctx.phi_fixups, NULL);
1387
1388 size_t idx_size_offset = blob_reserve_uint32(blob);
1389
1390 struct shader_info info = nir->info;
1391 uint32_t strings = 0;
1392 if (!strip && info.name)
1393 strings |= 0x1;
1394 if (!strip && info.label)
1395 strings |= 0x2;
1396 blob_write_uint32(blob, strings);
1397 if (!strip && info.name)
1398 blob_write_string(blob, info.name);
1399 if (!strip && info.label)
1400 blob_write_string(blob, info.label);
1401 info.name = info.label = NULL;
1402 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1403
1404 write_var_list(&ctx, &nir->uniforms);
1405 write_var_list(&ctx, &nir->inputs);
1406 write_var_list(&ctx, &nir->outputs);
1407 write_var_list(&ctx, &nir->shared);
1408 write_var_list(&ctx, &nir->globals);
1409 write_var_list(&ctx, &nir->system_values);
1410
1411 blob_write_uint32(blob, nir->num_inputs);
1412 blob_write_uint32(blob, nir->num_uniforms);
1413 blob_write_uint32(blob, nir->num_outputs);
1414 blob_write_uint32(blob, nir->num_shared);
1415 blob_write_uint32(blob, nir->scratch_size);
1416
1417 blob_write_uint32(blob, exec_list_length(&nir->functions));
1418 nir_foreach_function(fxn, nir) {
1419 write_function(&ctx, fxn);
1420 }
1421
1422 nir_foreach_function(fxn, nir) {
1423 if (fxn->impl)
1424 write_function_impl(&ctx, fxn->impl);
1425 }
1426
1427 blob_write_uint32(blob, nir->constant_data_size);
1428 if (nir->constant_data_size > 0)
1429 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
1430
1431 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1432
1433 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1434 util_dynarray_fini(&ctx.phi_fixups);
1435 }
1436
1437 nir_shader *
1438 nir_deserialize(void *mem_ctx,
1439 const struct nir_shader_compiler_options *options,
1440 struct blob_reader *blob)
1441 {
1442 read_ctx ctx = {0};
1443 ctx.blob = blob;
1444 list_inithead(&ctx.phi_srcs);
1445 ctx.idx_table_len = blob_read_uint32(blob);
1446 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1447
1448 uint32_t strings = blob_read_uint32(blob);
1449 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1450 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1451
1452 struct shader_info info;
1453 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1454
1455 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1456
1457 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1458 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1459
1460 ctx.nir->info = info;
1461
1462 read_var_list(&ctx, &ctx.nir->uniforms);
1463 read_var_list(&ctx, &ctx.nir->inputs);
1464 read_var_list(&ctx, &ctx.nir->outputs);
1465 read_var_list(&ctx, &ctx.nir->shared);
1466 read_var_list(&ctx, &ctx.nir->globals);
1467 read_var_list(&ctx, &ctx.nir->system_values);
1468
1469 ctx.nir->num_inputs = blob_read_uint32(blob);
1470 ctx.nir->num_uniforms = blob_read_uint32(blob);
1471 ctx.nir->num_outputs = blob_read_uint32(blob);
1472 ctx.nir->num_shared = blob_read_uint32(blob);
1473 ctx.nir->scratch_size = blob_read_uint32(blob);
1474
1475 unsigned num_functions = blob_read_uint32(blob);
1476 for (unsigned i = 0; i < num_functions; i++)
1477 read_function(&ctx);
1478
1479 nir_foreach_function(fxn, ctx.nir) {
1480 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
1481 fxn->impl = read_function_impl(&ctx, fxn);
1482 }
1483
1484 ctx.nir->constant_data_size = blob_read_uint32(blob);
1485 if (ctx.nir->constant_data_size > 0) {
1486 ctx.nir->constant_data =
1487 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
1488 blob_copy_bytes(blob, ctx.nir->constant_data,
1489 ctx.nir->constant_data_size);
1490 }
1491
1492 free(ctx.idx_table);
1493
1494 return ctx.nir;
1495 }
1496
1497 void
1498 nir_shader_serialize_deserialize(nir_shader *shader)
1499 {
1500 const struct nir_shader_compiler_options *options = shader->options;
1501
1502 struct blob writer;
1503 blob_init(&writer);
1504 nir_serialize(&writer, shader, false);
1505
1506 /* Delete all of dest's ralloc children but leave dest alone */
1507 void *dead_ctx = ralloc_context(NULL);
1508 ralloc_adopt(dead_ctx, shader);
1509 ralloc_free(dead_ctx);
1510
1511 dead_ctx = ralloc_context(NULL);
1512
1513 struct blob_reader reader;
1514 blob_reader_init(&reader, writer.data, writer.size);
1515 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
1516
1517 blob_finish(&writer);
1518
1519 nir_shader_replace(shader, copy);
1520 ralloc_free(dead_ctx);
1521 }