64d68b5d549bae5f3235bc72b8237d3aa18faf7c
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 30)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* Don't write optional data such as variable names. */
55 bool strip;
56 } write_ctx;
57
58 typedef struct {
59 nir_shader *nir;
60
61 struct blob_reader *blob;
62
63 /* the next index to assign to a NIR in-memory object */
64 uint32_t next_idx;
65
66 /* The length of the index -> object table */
67 uint32_t idx_table_len;
68
69 /* map from index to deserialized pointer */
70 void **idx_table;
71
72 /* List of phi sources. */
73 struct list_head phi_srcs;
74
75 } read_ctx;
76
77 static void
78 write_add_object(write_ctx *ctx, const void *obj)
79 {
80 uint32_t index = ctx->next_idx++;
81 assert(index != MAX_OBJECT_IDS);
82 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
83 }
84
85 static uint32_t
86 write_lookup_object(write_ctx *ctx, const void *obj)
87 {
88 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
89 assert(entry);
90 return (uint32_t)(uintptr_t) entry->data;
91 }
92
93 static void
94 write_object(write_ctx *ctx, const void *obj)
95 {
96 blob_write_uint32(ctx->blob, write_lookup_object(ctx, obj));
97 }
98
99 static void
100 read_add_object(read_ctx *ctx, void *obj)
101 {
102 assert(ctx->next_idx < ctx->idx_table_len);
103 ctx->idx_table[ctx->next_idx++] = obj;
104 }
105
106 static void *
107 read_lookup_object(read_ctx *ctx, uint32_t idx)
108 {
109 assert(idx < ctx->idx_table_len);
110 return ctx->idx_table[idx];
111 }
112
113 static void *
114 read_object(read_ctx *ctx)
115 {
116 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
117 }
118
119 static uint32_t
120 encode_bit_size_3bits(uint8_t bit_size)
121 {
122 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
123 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
124 if (bit_size)
125 return util_logbase2(bit_size) + 1;
126 return 0;
127 }
128
129 static uint8_t
130 decode_bit_size_3bits(uint8_t bit_size)
131 {
132 if (bit_size)
133 return 1 << (bit_size - 1);
134 return 0;
135 }
136
137 static uint8_t
138 encode_num_components_in_3bits(uint8_t num_components)
139 {
140 if (num_components <= 4)
141 return num_components;
142 if (num_components == 8)
143 return 5;
144 if (num_components == 16)
145 return 6;
146
147 unreachable("invalid number in num_components");
148 return 0;
149 }
150
151 static uint8_t
152 decode_num_components_in_3bits(uint8_t value)
153 {
154 if (value <= 4)
155 return value;
156 if (value == 5)
157 return 8;
158 if (value == 6)
159 return 16;
160
161 unreachable("invalid num_components encoding");
162 return 0;
163 }
164
165 static void
166 write_constant(write_ctx *ctx, const nir_constant *c)
167 {
168 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
169 blob_write_uint32(ctx->blob, c->num_elements);
170 for (unsigned i = 0; i < c->num_elements; i++)
171 write_constant(ctx, c->elements[i]);
172 }
173
174 static nir_constant *
175 read_constant(read_ctx *ctx, nir_variable *nvar)
176 {
177 nir_constant *c = ralloc(nvar, nir_constant);
178
179 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
180 c->num_elements = blob_read_uint32(ctx->blob);
181 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
182 for (unsigned i = 0; i < c->num_elements; i++)
183 c->elements[i] = read_constant(ctx, nvar);
184
185 return c;
186 }
187
188 union packed_var {
189 uint32_t u32;
190 struct {
191 unsigned has_name:1;
192 unsigned has_constant_initializer:1;
193 unsigned has_interface_type:1;
194 unsigned num_state_slots:13;
195 unsigned num_members:16;
196 } u;
197 };
198
199 static void
200 write_variable(write_ctx *ctx, const nir_variable *var)
201 {
202 write_add_object(ctx, var);
203 encode_type_to_blob(ctx->blob, var->type);
204
205 assert(var->num_state_slots < (1 << 13));
206 assert(var->num_members < (1 << 16));
207
208 STATIC_ASSERT(sizeof(union packed_var) == 4);
209 union packed_var flags;
210 flags.u32 = 0;
211
212 flags.u.has_name = !ctx->strip && var->name;
213 flags.u.has_constant_initializer = !!(var->constant_initializer);
214 flags.u.has_interface_type = !!(var->interface_type);
215 flags.u.num_state_slots = var->num_state_slots;
216 flags.u.num_members = var->num_members;
217
218 blob_write_uint32(ctx->blob, flags.u32);
219
220 if (flags.u.has_name)
221 blob_write_string(ctx->blob, var->name);
222
223 struct nir_variable_data data = var->data;
224
225 /* When stripping, we expect that the location is no longer needed,
226 * which is typically after shaders are linked.
227 */
228 if (ctx->strip &&
229 data.mode != nir_var_shader_in &&
230 data.mode != nir_var_shader_out)
231 data.location = 0;
232
233 blob_write_bytes(ctx->blob, &data, sizeof(data));
234
235 for (unsigned i = 0; i < var->num_state_slots; i++) {
236 blob_write_bytes(ctx->blob, &var->state_slots[i],
237 sizeof(var->state_slots[i]));
238 }
239 if (var->constant_initializer)
240 write_constant(ctx, var->constant_initializer);
241 if (var->interface_type)
242 encode_type_to_blob(ctx->blob, var->interface_type);
243 if (var->num_members > 0) {
244 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
245 var->num_members * sizeof(*var->members));
246 }
247 }
248
249 static nir_variable *
250 read_variable(read_ctx *ctx)
251 {
252 nir_variable *var = rzalloc(ctx->nir, nir_variable);
253 read_add_object(ctx, var);
254
255 var->type = decode_type_from_blob(ctx->blob);
256
257 union packed_var flags;
258 flags.u32 = blob_read_uint32(ctx->blob);
259
260 if (flags.u.has_name) {
261 const char *name = blob_read_string(ctx->blob);
262 var->name = ralloc_strdup(var, name);
263 } else {
264 var->name = NULL;
265 }
266 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
267 var->num_state_slots = flags.u.num_state_slots;
268 if (var->num_state_slots != 0) {
269 var->state_slots = ralloc_array(var, nir_state_slot,
270 var->num_state_slots);
271 for (unsigned i = 0; i < var->num_state_slots; i++) {
272 blob_copy_bytes(ctx->blob, &var->state_slots[i],
273 sizeof(var->state_slots[i]));
274 }
275 }
276 if (flags.u.has_constant_initializer)
277 var->constant_initializer = read_constant(ctx, var);
278 else
279 var->constant_initializer = NULL;
280 if (flags.u.has_interface_type)
281 var->interface_type = decode_type_from_blob(ctx->blob);
282 else
283 var->interface_type = NULL;
284 var->num_members = flags.u.num_members;
285 if (var->num_members > 0) {
286 var->members = ralloc_array(var, struct nir_variable_data,
287 var->num_members);
288 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
289 var->num_members * sizeof(*var->members));
290 }
291
292 return var;
293 }
294
295 static void
296 write_var_list(write_ctx *ctx, const struct exec_list *src)
297 {
298 blob_write_uint32(ctx->blob, exec_list_length(src));
299 foreach_list_typed(nir_variable, var, node, src) {
300 write_variable(ctx, var);
301 }
302 }
303
304 static void
305 read_var_list(read_ctx *ctx, struct exec_list *dst)
306 {
307 exec_list_make_empty(dst);
308 unsigned num_vars = blob_read_uint32(ctx->blob);
309 for (unsigned i = 0; i < num_vars; i++) {
310 nir_variable *var = read_variable(ctx);
311 exec_list_push_tail(dst, &var->node);
312 }
313 }
314
315 static void
316 write_register(write_ctx *ctx, const nir_register *reg)
317 {
318 write_add_object(ctx, reg);
319 blob_write_uint32(ctx->blob, reg->num_components);
320 blob_write_uint32(ctx->blob, reg->bit_size);
321 blob_write_uint32(ctx->blob, reg->num_array_elems);
322 blob_write_uint32(ctx->blob, reg->index);
323 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
324 if (!ctx->strip && reg->name)
325 blob_write_string(ctx->blob, reg->name);
326 }
327
328 static nir_register *
329 read_register(read_ctx *ctx)
330 {
331 nir_register *reg = ralloc(ctx->nir, nir_register);
332 read_add_object(ctx, reg);
333 reg->num_components = blob_read_uint32(ctx->blob);
334 reg->bit_size = blob_read_uint32(ctx->blob);
335 reg->num_array_elems = blob_read_uint32(ctx->blob);
336 reg->index = blob_read_uint32(ctx->blob);
337 bool has_name = blob_read_uint32(ctx->blob);
338 if (has_name) {
339 const char *name = blob_read_string(ctx->blob);
340 reg->name = ralloc_strdup(reg, name);
341 } else {
342 reg->name = NULL;
343 }
344
345 list_inithead(&reg->uses);
346 list_inithead(&reg->defs);
347 list_inithead(&reg->if_uses);
348
349 return reg;
350 }
351
352 static void
353 write_reg_list(write_ctx *ctx, const struct exec_list *src)
354 {
355 blob_write_uint32(ctx->blob, exec_list_length(src));
356 foreach_list_typed(nir_register, reg, node, src)
357 write_register(ctx, reg);
358 }
359
360 static void
361 read_reg_list(read_ctx *ctx, struct exec_list *dst)
362 {
363 exec_list_make_empty(dst);
364 unsigned num_regs = blob_read_uint32(ctx->blob);
365 for (unsigned i = 0; i < num_regs; i++) {
366 nir_register *reg = read_register(ctx);
367 exec_list_push_tail(dst, &reg->node);
368 }
369 }
370
371 static void
372 write_src(write_ctx *ctx, const nir_src *src)
373 {
374 /* Since sources are very frequent, we try to save some space when storing
375 * them. In particular, we store whether the source is a register and
376 * whether the register has an indirect index in the low two bits. We can
377 * assume that the high two bits of the index are zero, since otherwise our
378 * address space would've been exhausted allocating the remap table!
379 */
380 if (src->is_ssa) {
381 uint32_t idx = write_lookup_object(ctx, src->ssa) << 2;
382 idx |= 1;
383 blob_write_uint32(ctx->blob, idx);
384 } else {
385 uint32_t idx = write_lookup_object(ctx, src->reg.reg) << 2;
386 if (src->reg.indirect)
387 idx |= 2;
388 blob_write_uint32(ctx->blob, idx);
389 blob_write_uint32(ctx->blob, src->reg.base_offset);
390 if (src->reg.indirect) {
391 write_src(ctx, src->reg.indirect);
392 }
393 }
394 }
395
396 static void
397 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
398 {
399 uint32_t val = blob_read_uint32(ctx->blob);
400 uint32_t idx = val >> 2;
401 src->is_ssa = val & 0x1;
402 if (src->is_ssa) {
403 src->ssa = read_lookup_object(ctx, idx);
404 } else {
405 bool is_indirect = val & 0x2;
406 src->reg.reg = read_lookup_object(ctx, idx);
407 src->reg.base_offset = blob_read_uint32(ctx->blob);
408 if (is_indirect) {
409 src->reg.indirect = ralloc(mem_ctx, nir_src);
410 read_src(ctx, src->reg.indirect, mem_ctx);
411 } else {
412 src->reg.indirect = NULL;
413 }
414 }
415 }
416
417 union packed_dest {
418 uint8_t u8;
419 struct {
420 uint8_t is_ssa:1;
421 uint8_t has_name:1;
422 uint8_t num_components:3;
423 uint8_t bit_size:3;
424 } ssa;
425 struct {
426 uint8_t is_ssa:1;
427 uint8_t is_indirect:1;
428 uint8_t _pad:6;
429 } reg;
430 };
431
432 union packed_instr {
433 uint32_t u32;
434 struct {
435 unsigned instr_type:4; /* always present */
436 unsigned _pad:20;
437 unsigned dest:8; /* always last */
438 } any;
439 struct {
440 unsigned instr_type:4;
441 unsigned exact:1;
442 unsigned no_signed_wrap:1;
443 unsigned no_unsigned_wrap:1;
444 unsigned saturate:1;
445 unsigned writemask:4;
446 unsigned op:9;
447 unsigned _pad:3;
448 unsigned dest:8;
449 } alu;
450 struct {
451 unsigned instr_type:4;
452 unsigned deref_type:3;
453 unsigned mode:10;
454 unsigned _pad:7;
455 unsigned dest:8;
456 } deref;
457 struct {
458 unsigned instr_type:4;
459 unsigned intrinsic:9;
460 unsigned num_components:3;
461 unsigned _pad:8;
462 unsigned dest:8;
463 } intrinsic;
464 struct {
465 unsigned instr_type:4;
466 unsigned last_component:4;
467 unsigned bit_size:3;
468 unsigned _pad:21;
469 } load_const;
470 struct {
471 unsigned instr_type:4;
472 unsigned last_component:4;
473 unsigned bit_size:3;
474 unsigned _pad:21;
475 } undef;
476 struct {
477 unsigned instr_type:4;
478 unsigned num_srcs:4;
479 unsigned op:4;
480 unsigned texture_array_size:12;
481 unsigned dest:8;
482 } tex;
483 struct {
484 unsigned instr_type:4;
485 unsigned num_srcs:20;
486 unsigned dest:8;
487 } phi;
488 struct {
489 unsigned instr_type:4;
490 unsigned type:2;
491 unsigned _pad:26;
492 } jump;
493 };
494
495 /* Write "lo24" as low 24 bits in the first uint32. */
496 static void
497 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header)
498 {
499 STATIC_ASSERT(sizeof(union packed_dest) == 1);
500 union packed_dest dest;
501 dest.u8 = 0;
502
503 dest.ssa.is_ssa = dst->is_ssa;
504 if (dst->is_ssa) {
505 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
506 dest.ssa.num_components =
507 encode_num_components_in_3bits(dst->ssa.num_components);
508 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
509 } else {
510 dest.reg.is_indirect = !!(dst->reg.indirect);
511 }
512
513 header.any.dest = dest.u8;
514 blob_write_uint32(ctx->blob, header.u32);
515
516 if (dst->is_ssa) {
517 write_add_object(ctx, &dst->ssa);
518 if (dest.ssa.has_name)
519 blob_write_string(ctx->blob, dst->ssa.name);
520 } else {
521 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
522 blob_write_uint32(ctx->blob, dst->reg.base_offset);
523 if (dst->reg.indirect)
524 write_src(ctx, dst->reg.indirect);
525 }
526 }
527
528 static void
529 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
530 union packed_instr header)
531 {
532 union packed_dest dest;
533 dest.u8 = header.any.dest;
534
535 if (dest.ssa.is_ssa) {
536 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
537 unsigned num_components =
538 decode_num_components_in_3bits(dest.ssa.num_components);
539 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
540 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
541 read_add_object(ctx, &dst->ssa);
542 } else {
543 dst->reg.reg = read_object(ctx);
544 dst->reg.base_offset = blob_read_uint32(ctx->blob);
545 if (dest.reg.is_indirect) {
546 dst->reg.indirect = ralloc(instr, nir_src);
547 read_src(ctx, dst->reg.indirect, instr);
548 }
549 }
550 }
551
552 static void
553 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
554 {
555 /* 9 bits for nir_op */
556 STATIC_ASSERT(nir_num_opcodes <= 512);
557 union packed_instr header;
558 header.u32 = 0;
559
560 header.alu.instr_type = alu->instr.type;
561 header.alu.exact = alu->exact;
562 header.alu.no_signed_wrap = alu->no_signed_wrap;
563 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
564 header.alu.saturate = alu->dest.saturate;
565 header.alu.writemask = alu->dest.write_mask;
566 header.alu.op = alu->op;
567
568 write_dest(ctx, &alu->dest.dest, header);
569
570 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
571 write_src(ctx, &alu->src[i].src);
572 uint32_t flags = alu->src[i].negate;
573 flags |= alu->src[i].abs << 1;
574 for (unsigned j = 0; j < 4; j++)
575 flags |= alu->src[i].swizzle[j] << (2 + 2 * j);
576 blob_write_uint32(ctx->blob, flags);
577 }
578 }
579
580 static nir_alu_instr *
581 read_alu(read_ctx *ctx, union packed_instr header)
582 {
583 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
584
585 alu->exact = header.alu.exact;
586 alu->no_signed_wrap = header.alu.no_signed_wrap;
587 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
588 alu->dest.saturate = header.alu.saturate;
589 alu->dest.write_mask = header.alu.writemask;
590
591 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
592
593 for (unsigned i = 0; i < nir_op_infos[header.alu.op].num_inputs; i++) {
594 read_src(ctx, &alu->src[i].src, &alu->instr);
595 uint32_t flags = blob_read_uint32(ctx->blob);
596 alu->src[i].negate = flags & 1;
597 alu->src[i].abs = flags & 2;
598 for (unsigned j = 0; j < 4; j++)
599 alu->src[i].swizzle[j] = (flags >> (2 * j + 2)) & 3;
600 }
601
602 return alu;
603 }
604
605 static void
606 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
607 {
608 assert(deref->deref_type < 8);
609 assert(deref->mode < (1 << 10));
610
611 union packed_instr header;
612 header.u32 = 0;
613
614 header.deref.instr_type = deref->instr.type;
615 header.deref.deref_type = deref->deref_type;
616 header.deref.mode = deref->mode;
617
618 write_dest(ctx, &deref->dest, header);
619 encode_type_to_blob(ctx->blob, deref->type);
620
621 if (deref->deref_type == nir_deref_type_var) {
622 write_object(ctx, deref->var);
623 return;
624 }
625
626 write_src(ctx, &deref->parent);
627
628 switch (deref->deref_type) {
629 case nir_deref_type_struct:
630 blob_write_uint32(ctx->blob, deref->strct.index);
631 break;
632
633 case nir_deref_type_array:
634 case nir_deref_type_ptr_as_array:
635 write_src(ctx, &deref->arr.index);
636 break;
637
638 case nir_deref_type_cast:
639 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
640 break;
641
642 case nir_deref_type_array_wildcard:
643 /* Nothing to do */
644 break;
645
646 default:
647 unreachable("Invalid deref type");
648 }
649 }
650
651 static nir_deref_instr *
652 read_deref(read_ctx *ctx, union packed_instr header)
653 {
654 nir_deref_type deref_type = header.deref.deref_type;
655 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
656
657 read_dest(ctx, &deref->dest, &deref->instr, header);
658
659 deref->mode = header.deref.mode;
660 deref->type = decode_type_from_blob(ctx->blob);
661
662 if (deref_type == nir_deref_type_var) {
663 deref->var = read_object(ctx);
664 return deref;
665 }
666
667 read_src(ctx, &deref->parent, &deref->instr);
668
669 switch (deref->deref_type) {
670 case nir_deref_type_struct:
671 deref->strct.index = blob_read_uint32(ctx->blob);
672 break;
673
674 case nir_deref_type_array:
675 case nir_deref_type_ptr_as_array:
676 read_src(ctx, &deref->arr.index, &deref->instr);
677 break;
678
679 case nir_deref_type_cast:
680 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
681 break;
682
683 case nir_deref_type_array_wildcard:
684 /* Nothing to do */
685 break;
686
687 default:
688 unreachable("Invalid deref type");
689 }
690
691 return deref;
692 }
693
694 static void
695 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
696 {
697 /* 9 bits for nir_intrinsic_op */
698 STATIC_ASSERT(nir_num_intrinsics <= 512);
699 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
700 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
701 assert(intrin->intrinsic < 512);
702
703 union packed_instr header;
704 header.u32 = 0;
705
706 header.intrinsic.instr_type = intrin->instr.type;
707 header.intrinsic.intrinsic = intrin->intrinsic;
708 header.intrinsic.num_components =
709 encode_num_components_in_3bits(intrin->num_components);
710
711 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
712 write_dest(ctx, &intrin->dest, header);
713 else
714 blob_write_uint32(ctx->blob, header.u32);
715
716 for (unsigned i = 0; i < num_srcs; i++)
717 write_src(ctx, &intrin->src[i]);
718
719 for (unsigned i = 0; i < num_indices; i++)
720 blob_write_uint32(ctx->blob, intrin->const_index[i]);
721 }
722
723 static nir_intrinsic_instr *
724 read_intrinsic(read_ctx *ctx, union packed_instr header)
725 {
726 nir_intrinsic_op op = header.intrinsic.intrinsic;
727 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
728
729 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
730 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
731
732 intrin->num_components =
733 decode_num_components_in_3bits(header.intrinsic.num_components);
734
735 if (nir_intrinsic_infos[op].has_dest)
736 read_dest(ctx, &intrin->dest, &intrin->instr, header);
737
738 for (unsigned i = 0; i < num_srcs; i++)
739 read_src(ctx, &intrin->src[i], &intrin->instr);
740
741 for (unsigned i = 0; i < num_indices; i++)
742 intrin->const_index[i] = blob_read_uint32(ctx->blob);
743
744 return intrin;
745 }
746
747 static void
748 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
749 {
750 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
751 union packed_instr header;
752 header.u32 = 0;
753
754 header.load_const.instr_type = lc->instr.type;
755 header.load_const.last_component = lc->def.num_components - 1;
756 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
757
758 blob_write_uint32(ctx->blob, header.u32);
759 blob_write_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
760 write_add_object(ctx, &lc->def);
761 }
762
763 static nir_load_const_instr *
764 read_load_const(read_ctx *ctx, union packed_instr header)
765 {
766 nir_load_const_instr *lc =
767 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
768 decode_bit_size_3bits(header.load_const.bit_size));
769
770 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
771 read_add_object(ctx, &lc->def);
772 return lc;
773 }
774
775 static void
776 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
777 {
778 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
779
780 union packed_instr header;
781 header.u32 = 0;
782
783 header.undef.instr_type = undef->instr.type;
784 header.undef.last_component = undef->def.num_components - 1;
785 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
786
787 blob_write_uint32(ctx->blob, header.u32);
788 write_add_object(ctx, &undef->def);
789 }
790
791 static nir_ssa_undef_instr *
792 read_ssa_undef(read_ctx *ctx, union packed_instr header)
793 {
794 nir_ssa_undef_instr *undef =
795 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
796 decode_bit_size_3bits(header.undef.bit_size));
797
798 read_add_object(ctx, &undef->def);
799 return undef;
800 }
801
802 union packed_tex_data {
803 uint32_t u32;
804 struct {
805 enum glsl_sampler_dim sampler_dim:4;
806 nir_alu_type dest_type:8;
807 unsigned coord_components:3;
808 unsigned is_array:1;
809 unsigned is_shadow:1;
810 unsigned is_new_style_shadow:1;
811 unsigned component:2;
812 unsigned unused:10; /* Mark unused for valgrind. */
813 } u;
814 };
815
816 static void
817 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
818 {
819 assert(tex->num_srcs < 16);
820 assert(tex->op < 16);
821 assert(tex->texture_array_size < 1024);
822
823 union packed_instr header;
824 header.u32 = 0;
825
826 header.tex.instr_type = tex->instr.type;
827 header.tex.num_srcs = tex->num_srcs;
828 header.tex.op = tex->op;
829 header.tex.texture_array_size = tex->texture_array_size;
830
831 write_dest(ctx, &tex->dest, header);
832
833 blob_write_uint32(ctx->blob, tex->texture_index);
834 blob_write_uint32(ctx->blob, tex->sampler_index);
835 if (tex->op == nir_texop_tg4)
836 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
837
838 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
839 union packed_tex_data packed = {
840 .u.sampler_dim = tex->sampler_dim,
841 .u.dest_type = tex->dest_type,
842 .u.coord_components = tex->coord_components,
843 .u.is_array = tex->is_array,
844 .u.is_shadow = tex->is_shadow,
845 .u.is_new_style_shadow = tex->is_new_style_shadow,
846 .u.component = tex->component,
847 };
848 blob_write_uint32(ctx->blob, packed.u32);
849
850 for (unsigned i = 0; i < tex->num_srcs; i++) {
851 blob_write_uint32(ctx->blob, tex->src[i].src_type);
852 write_src(ctx, &tex->src[i].src);
853 }
854 }
855
856 static nir_tex_instr *
857 read_tex(read_ctx *ctx, union packed_instr header)
858 {
859 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
860
861 read_dest(ctx, &tex->dest, &tex->instr, header);
862
863 tex->op = header.tex.op;
864 tex->texture_index = blob_read_uint32(ctx->blob);
865 tex->texture_array_size = header.tex.texture_array_size;
866 tex->sampler_index = blob_read_uint32(ctx->blob);
867 if (tex->op == nir_texop_tg4)
868 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
869
870 union packed_tex_data packed;
871 packed.u32 = blob_read_uint32(ctx->blob);
872 tex->sampler_dim = packed.u.sampler_dim;
873 tex->dest_type = packed.u.dest_type;
874 tex->coord_components = packed.u.coord_components;
875 tex->is_array = packed.u.is_array;
876 tex->is_shadow = packed.u.is_shadow;
877 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
878 tex->component = packed.u.component;
879
880 for (unsigned i = 0; i < tex->num_srcs; i++) {
881 tex->src[i].src_type = blob_read_uint32(ctx->blob);
882 read_src(ctx, &tex->src[i].src, &tex->instr);
883 }
884
885 return tex;
886 }
887
888 static void
889 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
890 {
891 union packed_instr header;
892 header.u32 = 0;
893
894 header.phi.instr_type = phi->instr.type;
895 header.phi.num_srcs = exec_list_length(&phi->srcs);
896
897 /* Phi nodes are special, since they may reference SSA definitions and
898 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
899 * and then store enough information so that a later fixup pass can fill
900 * them in correctly.
901 */
902 write_dest(ctx, &phi->dest, header);
903
904 nir_foreach_phi_src(src, phi) {
905 assert(src->src.is_ssa);
906 size_t blob_offset = blob_reserve_uint32(ctx->blob);
907 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
908 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
909 write_phi_fixup fixup = {
910 .blob_offset = blob_offset,
911 .src = src->src.ssa,
912 .block = src->pred,
913 };
914 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
915 }
916 }
917
918 static void
919 write_fixup_phis(write_ctx *ctx)
920 {
921 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
922 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
923 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
924 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
925 }
926
927 util_dynarray_clear(&ctx->phi_fixups);
928 }
929
930 static nir_phi_instr *
931 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
932 {
933 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
934
935 read_dest(ctx, &phi->dest, &phi->instr, header);
936
937 /* For similar reasons as before, we just store the index directly into the
938 * pointer, and let a later pass resolve the phi sources.
939 *
940 * In order to ensure that the copied sources (which are just the indices
941 * from the blob for now) don't get inserted into the old shader's use-def
942 * lists, we have to add the phi instruction *before* we set up its
943 * sources.
944 */
945 nir_instr_insert_after_block(blk, &phi->instr);
946
947 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
948 nir_phi_src *src = ralloc(phi, nir_phi_src);
949
950 src->src.is_ssa = true;
951 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
952 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
953
954 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
955 * we have to set the parent_instr manually. It doesn't really matter
956 * when we do it, so we might as well do it here.
957 */
958 src->src.parent_instr = &phi->instr;
959
960 /* Stash it in the list of phi sources. We'll walk this list and fix up
961 * sources at the very end of read_function_impl.
962 */
963 list_add(&src->src.use_link, &ctx->phi_srcs);
964
965 exec_list_push_tail(&phi->srcs, &src->node);
966 }
967
968 return phi;
969 }
970
971 static void
972 read_fixup_phis(read_ctx *ctx)
973 {
974 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
975 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
976 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
977
978 /* Remove from this list */
979 list_del(&src->src.use_link);
980
981 list_addtail(&src->src.use_link, &src->src.ssa->uses);
982 }
983 assert(list_is_empty(&ctx->phi_srcs));
984 }
985
986 static void
987 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
988 {
989 assert(jmp->type < 4);
990
991 union packed_instr header;
992 header.u32 = 0;
993
994 header.jump.instr_type = jmp->instr.type;
995 header.jump.type = jmp->type;
996
997 blob_write_uint32(ctx->blob, header.u32);
998 }
999
1000 static nir_jump_instr *
1001 read_jump(read_ctx *ctx, union packed_instr header)
1002 {
1003 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1004 return jmp;
1005 }
1006
1007 static void
1008 write_call(write_ctx *ctx, const nir_call_instr *call)
1009 {
1010 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1011
1012 for (unsigned i = 0; i < call->num_params; i++)
1013 write_src(ctx, &call->params[i]);
1014 }
1015
1016 static nir_call_instr *
1017 read_call(read_ctx *ctx)
1018 {
1019 nir_function *callee = read_object(ctx);
1020 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1021
1022 for (unsigned i = 0; i < call->num_params; i++)
1023 read_src(ctx, &call->params[i], call);
1024
1025 return call;
1026 }
1027
1028 static void
1029 write_instr(write_ctx *ctx, const nir_instr *instr)
1030 {
1031 /* We have only 4 bits for the instruction type. */
1032 assert(instr->type < 16);
1033
1034 switch (instr->type) {
1035 case nir_instr_type_alu:
1036 write_alu(ctx, nir_instr_as_alu(instr));
1037 break;
1038 case nir_instr_type_deref:
1039 write_deref(ctx, nir_instr_as_deref(instr));
1040 break;
1041 case nir_instr_type_intrinsic:
1042 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1043 break;
1044 case nir_instr_type_load_const:
1045 write_load_const(ctx, nir_instr_as_load_const(instr));
1046 break;
1047 case nir_instr_type_ssa_undef:
1048 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1049 break;
1050 case nir_instr_type_tex:
1051 write_tex(ctx, nir_instr_as_tex(instr));
1052 break;
1053 case nir_instr_type_phi:
1054 write_phi(ctx, nir_instr_as_phi(instr));
1055 break;
1056 case nir_instr_type_jump:
1057 write_jump(ctx, nir_instr_as_jump(instr));
1058 break;
1059 case nir_instr_type_call:
1060 blob_write_uint32(ctx->blob, instr->type);
1061 write_call(ctx, nir_instr_as_call(instr));
1062 break;
1063 case nir_instr_type_parallel_copy:
1064 unreachable("Cannot write parallel copies");
1065 default:
1066 unreachable("bad instr type");
1067 }
1068 }
1069
1070 static void
1071 read_instr(read_ctx *ctx, nir_block *block)
1072 {
1073 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1074 union packed_instr header;
1075 header.u32 = blob_read_uint32(ctx->blob);
1076 nir_instr *instr;
1077
1078 switch (header.any.instr_type) {
1079 case nir_instr_type_alu:
1080 instr = &read_alu(ctx, header)->instr;
1081 break;
1082 case nir_instr_type_deref:
1083 instr = &read_deref(ctx, header)->instr;
1084 break;
1085 case nir_instr_type_intrinsic:
1086 instr = &read_intrinsic(ctx, header)->instr;
1087 break;
1088 case nir_instr_type_load_const:
1089 instr = &read_load_const(ctx, header)->instr;
1090 break;
1091 case nir_instr_type_ssa_undef:
1092 instr = &read_ssa_undef(ctx, header)->instr;
1093 break;
1094 case nir_instr_type_tex:
1095 instr = &read_tex(ctx, header)->instr;
1096 break;
1097 case nir_instr_type_phi:
1098 /* Phi instructions are a bit of a special case when reading because we
1099 * don't want inserting the instruction to automatically handle use/defs
1100 * for us. Instead, we need to wait until all the blocks/instructions
1101 * are read so that we can set their sources up.
1102 */
1103 read_phi(ctx, block, header);
1104 return;
1105 case nir_instr_type_jump:
1106 instr = &read_jump(ctx, header)->instr;
1107 break;
1108 case nir_instr_type_call:
1109 instr = &read_call(ctx)->instr;
1110 break;
1111 case nir_instr_type_parallel_copy:
1112 unreachable("Cannot read parallel copies");
1113 default:
1114 unreachable("bad instr type");
1115 }
1116
1117 nir_instr_insert_after_block(block, instr);
1118 }
1119
1120 static void
1121 write_block(write_ctx *ctx, const nir_block *block)
1122 {
1123 write_add_object(ctx, block);
1124 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1125 nir_foreach_instr(instr, block)
1126 write_instr(ctx, instr);
1127 }
1128
1129 static void
1130 read_block(read_ctx *ctx, struct exec_list *cf_list)
1131 {
1132 /* Don't actually create a new block. Just use the one from the tail of
1133 * the list. NIR guarantees that the tail of the list is a block and that
1134 * no two blocks are side-by-side in the IR; It should be empty.
1135 */
1136 nir_block *block =
1137 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1138
1139 read_add_object(ctx, block);
1140 unsigned num_instrs = blob_read_uint32(ctx->blob);
1141 for (unsigned i = 0; i < num_instrs; i++) {
1142 read_instr(ctx, block);
1143 }
1144 }
1145
1146 static void
1147 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1148
1149 static void
1150 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1151
1152 static void
1153 write_if(write_ctx *ctx, nir_if *nif)
1154 {
1155 write_src(ctx, &nif->condition);
1156
1157 write_cf_list(ctx, &nif->then_list);
1158 write_cf_list(ctx, &nif->else_list);
1159 }
1160
1161 static void
1162 read_if(read_ctx *ctx, struct exec_list *cf_list)
1163 {
1164 nir_if *nif = nir_if_create(ctx->nir);
1165
1166 read_src(ctx, &nif->condition, nif);
1167
1168 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1169
1170 read_cf_list(ctx, &nif->then_list);
1171 read_cf_list(ctx, &nif->else_list);
1172 }
1173
1174 static void
1175 write_loop(write_ctx *ctx, nir_loop *loop)
1176 {
1177 write_cf_list(ctx, &loop->body);
1178 }
1179
1180 static void
1181 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1182 {
1183 nir_loop *loop = nir_loop_create(ctx->nir);
1184
1185 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1186
1187 read_cf_list(ctx, &loop->body);
1188 }
1189
1190 static void
1191 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1192 {
1193 blob_write_uint32(ctx->blob, cf->type);
1194
1195 switch (cf->type) {
1196 case nir_cf_node_block:
1197 write_block(ctx, nir_cf_node_as_block(cf));
1198 break;
1199 case nir_cf_node_if:
1200 write_if(ctx, nir_cf_node_as_if(cf));
1201 break;
1202 case nir_cf_node_loop:
1203 write_loop(ctx, nir_cf_node_as_loop(cf));
1204 break;
1205 default:
1206 unreachable("bad cf type");
1207 }
1208 }
1209
1210 static void
1211 read_cf_node(read_ctx *ctx, struct exec_list *list)
1212 {
1213 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1214
1215 switch (type) {
1216 case nir_cf_node_block:
1217 read_block(ctx, list);
1218 break;
1219 case nir_cf_node_if:
1220 read_if(ctx, list);
1221 break;
1222 case nir_cf_node_loop:
1223 read_loop(ctx, list);
1224 break;
1225 default:
1226 unreachable("bad cf type");
1227 }
1228 }
1229
1230 static void
1231 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1232 {
1233 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1234 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1235 write_cf_node(ctx, cf);
1236 }
1237 }
1238
1239 static void
1240 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1241 {
1242 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1243 for (unsigned i = 0; i < num_cf_nodes; i++)
1244 read_cf_node(ctx, cf_list);
1245 }
1246
1247 static void
1248 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1249 {
1250 write_var_list(ctx, &fi->locals);
1251 write_reg_list(ctx, &fi->registers);
1252 blob_write_uint32(ctx->blob, fi->reg_alloc);
1253
1254 write_cf_list(ctx, &fi->body);
1255 write_fixup_phis(ctx);
1256 }
1257
1258 static nir_function_impl *
1259 read_function_impl(read_ctx *ctx, nir_function *fxn)
1260 {
1261 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1262 fi->function = fxn;
1263
1264 read_var_list(ctx, &fi->locals);
1265 read_reg_list(ctx, &fi->registers);
1266 fi->reg_alloc = blob_read_uint32(ctx->blob);
1267
1268 read_cf_list(ctx, &fi->body);
1269 read_fixup_phis(ctx);
1270
1271 fi->valid_metadata = 0;
1272
1273 return fi;
1274 }
1275
1276 static void
1277 write_function(write_ctx *ctx, const nir_function *fxn)
1278 {
1279 uint32_t flags = fxn->is_entrypoint;
1280 if (fxn->name)
1281 flags |= 0x2;
1282 if (fxn->impl)
1283 flags |= 0x4;
1284 blob_write_uint32(ctx->blob, flags);
1285 if (fxn->name)
1286 blob_write_string(ctx->blob, fxn->name);
1287
1288 write_add_object(ctx, fxn);
1289
1290 blob_write_uint32(ctx->blob, fxn->num_params);
1291 for (unsigned i = 0; i < fxn->num_params; i++) {
1292 uint32_t val =
1293 ((uint32_t)fxn->params[i].num_components) |
1294 ((uint32_t)fxn->params[i].bit_size) << 8;
1295 blob_write_uint32(ctx->blob, val);
1296 }
1297
1298 /* At first glance, it looks like we should write the function_impl here.
1299 * However, call instructions need to be able to reference at least the
1300 * function and those will get processed as we write the function_impls.
1301 * We stop here and write function_impls as a second pass.
1302 */
1303 }
1304
1305 static void
1306 read_function(read_ctx *ctx)
1307 {
1308 uint32_t flags = blob_read_uint32(ctx->blob);
1309 bool has_name = flags & 0x2;
1310 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1311
1312 nir_function *fxn = nir_function_create(ctx->nir, name);
1313
1314 read_add_object(ctx, fxn);
1315
1316 fxn->num_params = blob_read_uint32(ctx->blob);
1317 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1318 for (unsigned i = 0; i < fxn->num_params; i++) {
1319 uint32_t val = blob_read_uint32(ctx->blob);
1320 fxn->params[i].num_components = val & 0xff;
1321 fxn->params[i].bit_size = (val >> 8) & 0xff;
1322 }
1323
1324 fxn->is_entrypoint = flags & 0x1;
1325 if (flags & 0x4)
1326 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1327 }
1328
1329 /**
1330 * Serialize NIR into a binary blob.
1331 *
1332 * \param strip Don't serialize information only useful for debugging,
1333 * such as variable names, making cache hits from similar
1334 * shaders more likely.
1335 */
1336 void
1337 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1338 {
1339 write_ctx ctx = {0};
1340 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1341 ctx.blob = blob;
1342 ctx.nir = nir;
1343 ctx.strip = strip;
1344 util_dynarray_init(&ctx.phi_fixups, NULL);
1345
1346 size_t idx_size_offset = blob_reserve_uint32(blob);
1347
1348 struct shader_info info = nir->info;
1349 uint32_t strings = 0;
1350 if (!strip && info.name)
1351 strings |= 0x1;
1352 if (!strip && info.label)
1353 strings |= 0x2;
1354 blob_write_uint32(blob, strings);
1355 if (!strip && info.name)
1356 blob_write_string(blob, info.name);
1357 if (!strip && info.label)
1358 blob_write_string(blob, info.label);
1359 info.name = info.label = NULL;
1360 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1361
1362 write_var_list(&ctx, &nir->uniforms);
1363 write_var_list(&ctx, &nir->inputs);
1364 write_var_list(&ctx, &nir->outputs);
1365 write_var_list(&ctx, &nir->shared);
1366 write_var_list(&ctx, &nir->globals);
1367 write_var_list(&ctx, &nir->system_values);
1368
1369 blob_write_uint32(blob, nir->num_inputs);
1370 blob_write_uint32(blob, nir->num_uniforms);
1371 blob_write_uint32(blob, nir->num_outputs);
1372 blob_write_uint32(blob, nir->num_shared);
1373 blob_write_uint32(blob, nir->scratch_size);
1374
1375 blob_write_uint32(blob, exec_list_length(&nir->functions));
1376 nir_foreach_function(fxn, nir) {
1377 write_function(&ctx, fxn);
1378 }
1379
1380 nir_foreach_function(fxn, nir) {
1381 if (fxn->impl)
1382 write_function_impl(&ctx, fxn->impl);
1383 }
1384
1385 blob_write_uint32(blob, nir->constant_data_size);
1386 if (nir->constant_data_size > 0)
1387 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
1388
1389 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1390
1391 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1392 util_dynarray_fini(&ctx.phi_fixups);
1393 }
1394
1395 nir_shader *
1396 nir_deserialize(void *mem_ctx,
1397 const struct nir_shader_compiler_options *options,
1398 struct blob_reader *blob)
1399 {
1400 read_ctx ctx = {0};
1401 ctx.blob = blob;
1402 list_inithead(&ctx.phi_srcs);
1403 ctx.idx_table_len = blob_read_uint32(blob);
1404 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1405
1406 uint32_t strings = blob_read_uint32(blob);
1407 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1408 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1409
1410 struct shader_info info;
1411 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1412
1413 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1414
1415 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1416 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1417
1418 ctx.nir->info = info;
1419
1420 read_var_list(&ctx, &ctx.nir->uniforms);
1421 read_var_list(&ctx, &ctx.nir->inputs);
1422 read_var_list(&ctx, &ctx.nir->outputs);
1423 read_var_list(&ctx, &ctx.nir->shared);
1424 read_var_list(&ctx, &ctx.nir->globals);
1425 read_var_list(&ctx, &ctx.nir->system_values);
1426
1427 ctx.nir->num_inputs = blob_read_uint32(blob);
1428 ctx.nir->num_uniforms = blob_read_uint32(blob);
1429 ctx.nir->num_outputs = blob_read_uint32(blob);
1430 ctx.nir->num_shared = blob_read_uint32(blob);
1431 ctx.nir->scratch_size = blob_read_uint32(blob);
1432
1433 unsigned num_functions = blob_read_uint32(blob);
1434 for (unsigned i = 0; i < num_functions; i++)
1435 read_function(&ctx);
1436
1437 nir_foreach_function(fxn, ctx.nir) {
1438 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
1439 fxn->impl = read_function_impl(&ctx, fxn);
1440 }
1441
1442 ctx.nir->constant_data_size = blob_read_uint32(blob);
1443 if (ctx.nir->constant_data_size > 0) {
1444 ctx.nir->constant_data =
1445 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
1446 blob_copy_bytes(blob, ctx.nir->constant_data,
1447 ctx.nir->constant_data_size);
1448 }
1449
1450 free(ctx.idx_table);
1451
1452 return ctx.nir;
1453 }
1454
1455 void
1456 nir_shader_serialize_deserialize(nir_shader *shader)
1457 {
1458 const struct nir_shader_compiler_options *options = shader->options;
1459
1460 struct blob writer;
1461 blob_init(&writer);
1462 nir_serialize(&writer, shader, false);
1463
1464 /* Delete all of dest's ralloc children but leave dest alone */
1465 void *dead_ctx = ralloc_context(NULL);
1466 ralloc_adopt(dead_ctx, shader);
1467 ralloc_free(dead_ctx);
1468
1469 dead_ctx = ralloc_context(NULL);
1470
1471 struct blob_reader reader;
1472 blob_reader_init(&reader, writer.data, writer.size);
1473 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
1474
1475 blob_finish(&writer);
1476
1477 nir_shader_replace(shader, copy);
1478 ralloc_free(dead_ctx);
1479 }