nir/serialize: do ctx = {0} instead of manual initializations
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27
28 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
29 #define MAX_OBJECT_IDS (1 << 30)
30
31 typedef struct {
32 size_t blob_offset;
33 nir_ssa_def *src;
34 nir_block *block;
35 } write_phi_fixup;
36
37 typedef struct {
38 const nir_shader *nir;
39
40 struct blob *blob;
41
42 /* maps pointer to index */
43 struct hash_table *remap_table;
44
45 /* the next index to assign to a NIR in-memory object */
46 uint32_t next_idx;
47
48 /* Array of write_phi_fixup structs representing phi sources that need to
49 * be resolved in the second pass.
50 */
51 struct util_dynarray phi_fixups;
52
53 /* Don't write optional data such as variable names. */
54 bool strip;
55 } write_ctx;
56
57 typedef struct {
58 nir_shader *nir;
59
60 struct blob_reader *blob;
61
62 /* the next index to assign to a NIR in-memory object */
63 uint32_t next_idx;
64
65 /* The length of the index -> object table */
66 uint32_t idx_table_len;
67
68 /* map from index to deserialized pointer */
69 void **idx_table;
70
71 /* List of phi sources. */
72 struct list_head phi_srcs;
73
74 } read_ctx;
75
76 static void
77 write_add_object(write_ctx *ctx, const void *obj)
78 {
79 uint32_t index = ctx->next_idx++;
80 assert(index != MAX_OBJECT_IDS);
81 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
82 }
83
84 static uint32_t
85 write_lookup_object(write_ctx *ctx, const void *obj)
86 {
87 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
88 assert(entry);
89 return (uint32_t)(uintptr_t) entry->data;
90 }
91
92 static void
93 write_object(write_ctx *ctx, const void *obj)
94 {
95 blob_write_uint32(ctx->blob, write_lookup_object(ctx, obj));
96 }
97
98 static void
99 read_add_object(read_ctx *ctx, void *obj)
100 {
101 assert(ctx->next_idx < ctx->idx_table_len);
102 ctx->idx_table[ctx->next_idx++] = obj;
103 }
104
105 static void *
106 read_lookup_object(read_ctx *ctx, uint32_t idx)
107 {
108 assert(idx < ctx->idx_table_len);
109 return ctx->idx_table[idx];
110 }
111
112 static void *
113 read_object(read_ctx *ctx)
114 {
115 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
116 }
117
118 static void
119 write_constant(write_ctx *ctx, const nir_constant *c)
120 {
121 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
122 blob_write_uint32(ctx->blob, c->num_elements);
123 for (unsigned i = 0; i < c->num_elements; i++)
124 write_constant(ctx, c->elements[i]);
125 }
126
127 static nir_constant *
128 read_constant(read_ctx *ctx, nir_variable *nvar)
129 {
130 nir_constant *c = ralloc(nvar, nir_constant);
131
132 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
133 c->num_elements = blob_read_uint32(ctx->blob);
134 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
135 for (unsigned i = 0; i < c->num_elements; i++)
136 c->elements[i] = read_constant(ctx, nvar);
137
138 return c;
139 }
140
141 union packed_var {
142 uint32_t u32;
143 struct {
144 unsigned has_name:1;
145 unsigned has_constant_initializer:1;
146 unsigned has_interface_type:1;
147 unsigned num_state_slots:13;
148 unsigned num_members:16;
149 } u;
150 };
151
152 static void
153 write_variable(write_ctx *ctx, const nir_variable *var)
154 {
155 write_add_object(ctx, var);
156 encode_type_to_blob(ctx->blob, var->type);
157
158 assert(var->num_state_slots < (1 << 13));
159 assert(var->num_members < (1 << 16));
160
161 STATIC_ASSERT(sizeof(union packed_var) == 4);
162 union packed_var flags;
163 flags.u32 = 0;
164
165 flags.u.has_name = !ctx->strip && var->name;
166 flags.u.has_constant_initializer = !!(var->constant_initializer);
167 flags.u.has_interface_type = !!(var->interface_type);
168 flags.u.num_state_slots = var->num_state_slots;
169 flags.u.num_members = var->num_members;
170
171 blob_write_uint32(ctx->blob, flags.u32);
172
173 if (flags.u.has_name)
174 blob_write_string(ctx->blob, var->name);
175
176 struct nir_variable_data data = var->data;
177
178 /* When stripping, we expect that the location is no longer needed,
179 * which is typically after shaders are linked.
180 */
181 if (ctx->strip &&
182 data.mode != nir_var_shader_in &&
183 data.mode != nir_var_shader_out)
184 data.location = 0;
185
186 blob_write_bytes(ctx->blob, &data, sizeof(data));
187
188 for (unsigned i = 0; i < var->num_state_slots; i++) {
189 blob_write_bytes(ctx->blob, &var->state_slots[i],
190 sizeof(var->state_slots[i]));
191 }
192 if (var->constant_initializer)
193 write_constant(ctx, var->constant_initializer);
194 if (var->interface_type)
195 encode_type_to_blob(ctx->blob, var->interface_type);
196 if (var->num_members > 0) {
197 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
198 var->num_members * sizeof(*var->members));
199 }
200 }
201
202 static nir_variable *
203 read_variable(read_ctx *ctx)
204 {
205 nir_variable *var = rzalloc(ctx->nir, nir_variable);
206 read_add_object(ctx, var);
207
208 var->type = decode_type_from_blob(ctx->blob);
209
210 union packed_var flags;
211 flags.u32 = blob_read_uint32(ctx->blob);
212
213 if (flags.u.has_name) {
214 const char *name = blob_read_string(ctx->blob);
215 var->name = ralloc_strdup(var, name);
216 } else {
217 var->name = NULL;
218 }
219 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
220 var->num_state_slots = flags.u.num_state_slots;
221 if (var->num_state_slots != 0) {
222 var->state_slots = ralloc_array(var, nir_state_slot,
223 var->num_state_slots);
224 for (unsigned i = 0; i < var->num_state_slots; i++) {
225 blob_copy_bytes(ctx->blob, &var->state_slots[i],
226 sizeof(var->state_slots[i]));
227 }
228 }
229 if (flags.u.has_constant_initializer)
230 var->constant_initializer = read_constant(ctx, var);
231 else
232 var->constant_initializer = NULL;
233 if (flags.u.has_interface_type)
234 var->interface_type = decode_type_from_blob(ctx->blob);
235 else
236 var->interface_type = NULL;
237 var->num_members = flags.u.num_members;
238 if (var->num_members > 0) {
239 var->members = ralloc_array(var, struct nir_variable_data,
240 var->num_members);
241 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
242 var->num_members * sizeof(*var->members));
243 }
244
245 return var;
246 }
247
248 static void
249 write_var_list(write_ctx *ctx, const struct exec_list *src)
250 {
251 blob_write_uint32(ctx->blob, exec_list_length(src));
252 foreach_list_typed(nir_variable, var, node, src) {
253 write_variable(ctx, var);
254 }
255 }
256
257 static void
258 read_var_list(read_ctx *ctx, struct exec_list *dst)
259 {
260 exec_list_make_empty(dst);
261 unsigned num_vars = blob_read_uint32(ctx->blob);
262 for (unsigned i = 0; i < num_vars; i++) {
263 nir_variable *var = read_variable(ctx);
264 exec_list_push_tail(dst, &var->node);
265 }
266 }
267
268 static void
269 write_register(write_ctx *ctx, const nir_register *reg)
270 {
271 write_add_object(ctx, reg);
272 blob_write_uint32(ctx->blob, reg->num_components);
273 blob_write_uint32(ctx->blob, reg->bit_size);
274 blob_write_uint32(ctx->blob, reg->num_array_elems);
275 blob_write_uint32(ctx->blob, reg->index);
276 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
277 if (!ctx->strip && reg->name)
278 blob_write_string(ctx->blob, reg->name);
279 }
280
281 static nir_register *
282 read_register(read_ctx *ctx)
283 {
284 nir_register *reg = ralloc(ctx->nir, nir_register);
285 read_add_object(ctx, reg);
286 reg->num_components = blob_read_uint32(ctx->blob);
287 reg->bit_size = blob_read_uint32(ctx->blob);
288 reg->num_array_elems = blob_read_uint32(ctx->blob);
289 reg->index = blob_read_uint32(ctx->blob);
290 bool has_name = blob_read_uint32(ctx->blob);
291 if (has_name) {
292 const char *name = blob_read_string(ctx->blob);
293 reg->name = ralloc_strdup(reg, name);
294 } else {
295 reg->name = NULL;
296 }
297
298 list_inithead(&reg->uses);
299 list_inithead(&reg->defs);
300 list_inithead(&reg->if_uses);
301
302 return reg;
303 }
304
305 static void
306 write_reg_list(write_ctx *ctx, const struct exec_list *src)
307 {
308 blob_write_uint32(ctx->blob, exec_list_length(src));
309 foreach_list_typed(nir_register, reg, node, src)
310 write_register(ctx, reg);
311 }
312
313 static void
314 read_reg_list(read_ctx *ctx, struct exec_list *dst)
315 {
316 exec_list_make_empty(dst);
317 unsigned num_regs = blob_read_uint32(ctx->blob);
318 for (unsigned i = 0; i < num_regs; i++) {
319 nir_register *reg = read_register(ctx);
320 exec_list_push_tail(dst, &reg->node);
321 }
322 }
323
324 static void
325 write_src(write_ctx *ctx, const nir_src *src)
326 {
327 /* Since sources are very frequent, we try to save some space when storing
328 * them. In particular, we store whether the source is a register and
329 * whether the register has an indirect index in the low two bits. We can
330 * assume that the high two bits of the index are zero, since otherwise our
331 * address space would've been exhausted allocating the remap table!
332 */
333 if (src->is_ssa) {
334 uint32_t idx = write_lookup_object(ctx, src->ssa) << 2;
335 idx |= 1;
336 blob_write_uint32(ctx->blob, idx);
337 } else {
338 uint32_t idx = write_lookup_object(ctx, src->reg.reg) << 2;
339 if (src->reg.indirect)
340 idx |= 2;
341 blob_write_uint32(ctx->blob, idx);
342 blob_write_uint32(ctx->blob, src->reg.base_offset);
343 if (src->reg.indirect) {
344 write_src(ctx, src->reg.indirect);
345 }
346 }
347 }
348
349 static void
350 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
351 {
352 uint32_t val = blob_read_uint32(ctx->blob);
353 uint32_t idx = val >> 2;
354 src->is_ssa = val & 0x1;
355 if (src->is_ssa) {
356 src->ssa = read_lookup_object(ctx, idx);
357 } else {
358 bool is_indirect = val & 0x2;
359 src->reg.reg = read_lookup_object(ctx, idx);
360 src->reg.base_offset = blob_read_uint32(ctx->blob);
361 if (is_indirect) {
362 src->reg.indirect = ralloc(mem_ctx, nir_src);
363 read_src(ctx, src->reg.indirect, mem_ctx);
364 } else {
365 src->reg.indirect = NULL;
366 }
367 }
368 }
369
370 static void
371 write_dest(write_ctx *ctx, const nir_dest *dst)
372 {
373 uint32_t val = dst->is_ssa;
374 if (dst->is_ssa) {
375 val |= (!ctx->strip && dst->ssa.name) << 1;
376 val |= dst->ssa.num_components << 2;
377 val |= dst->ssa.bit_size << 5;
378 } else {
379 val |= !!(dst->reg.indirect) << 1;
380 }
381 blob_write_uint32(ctx->blob, val);
382 if (dst->is_ssa) {
383 write_add_object(ctx, &dst->ssa);
384 if (!ctx->strip && dst->ssa.name)
385 blob_write_string(ctx->blob, dst->ssa.name);
386 } else {
387 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
388 blob_write_uint32(ctx->blob, dst->reg.base_offset);
389 if (dst->reg.indirect)
390 write_src(ctx, dst->reg.indirect);
391 }
392 }
393
394 static void
395 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr)
396 {
397 uint32_t val = blob_read_uint32(ctx->blob);
398 bool is_ssa = val & 0x1;
399 if (is_ssa) {
400 bool has_name = val & 0x2;
401 unsigned num_components = (val >> 2) & 0x7;
402 unsigned bit_size = val >> 5;
403 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
404 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
405 read_add_object(ctx, &dst->ssa);
406 } else {
407 bool is_indirect = val & 0x2;
408 dst->reg.reg = read_object(ctx);
409 dst->reg.base_offset = blob_read_uint32(ctx->blob);
410 if (is_indirect) {
411 dst->reg.indirect = ralloc(instr, nir_src);
412 read_src(ctx, dst->reg.indirect, instr);
413 }
414 }
415 }
416
417 static void
418 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
419 {
420 blob_write_uint32(ctx->blob, alu->op);
421 uint32_t flags = alu->exact;
422 flags |= alu->no_signed_wrap << 1;
423 flags |= alu->no_unsigned_wrap << 2;
424 flags |= alu->dest.saturate << 3;
425 flags |= alu->dest.write_mask << 4;
426 blob_write_uint32(ctx->blob, flags);
427
428 write_dest(ctx, &alu->dest.dest);
429
430 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
431 write_src(ctx, &alu->src[i].src);
432 flags = alu->src[i].negate;
433 flags |= alu->src[i].abs << 1;
434 for (unsigned j = 0; j < 4; j++)
435 flags |= alu->src[i].swizzle[j] << (2 + 2 * j);
436 blob_write_uint32(ctx->blob, flags);
437 }
438 }
439
440 static nir_alu_instr *
441 read_alu(read_ctx *ctx)
442 {
443 nir_op op = blob_read_uint32(ctx->blob);
444 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, op);
445
446 uint32_t flags = blob_read_uint32(ctx->blob);
447 alu->exact = flags & 1;
448 alu->no_signed_wrap = flags & 2;
449 alu->no_unsigned_wrap = flags & 4;
450 alu->dest.saturate = flags & 8;
451 alu->dest.write_mask = flags >> 4;
452
453 read_dest(ctx, &alu->dest.dest, &alu->instr);
454
455 for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
456 read_src(ctx, &alu->src[i].src, &alu->instr);
457 flags = blob_read_uint32(ctx->blob);
458 alu->src[i].negate = flags & 1;
459 alu->src[i].abs = flags & 2;
460 for (unsigned j = 0; j < 4; j++)
461 alu->src[i].swizzle[j] = (flags >> (2 * j + 2)) & 3;
462 }
463
464 return alu;
465 }
466
467 static void
468 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
469 {
470 blob_write_uint32(ctx->blob, deref->deref_type);
471
472 blob_write_uint32(ctx->blob, deref->mode);
473 encode_type_to_blob(ctx->blob, deref->type);
474
475 write_dest(ctx, &deref->dest);
476
477 if (deref->deref_type == nir_deref_type_var) {
478 write_object(ctx, deref->var);
479 return;
480 }
481
482 write_src(ctx, &deref->parent);
483
484 switch (deref->deref_type) {
485 case nir_deref_type_struct:
486 blob_write_uint32(ctx->blob, deref->strct.index);
487 break;
488
489 case nir_deref_type_array:
490 case nir_deref_type_ptr_as_array:
491 write_src(ctx, &deref->arr.index);
492 break;
493
494 case nir_deref_type_cast:
495 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
496 break;
497
498 case nir_deref_type_array_wildcard:
499 /* Nothing to do */
500 break;
501
502 default:
503 unreachable("Invalid deref type");
504 }
505 }
506
507 static nir_deref_instr *
508 read_deref(read_ctx *ctx)
509 {
510 nir_deref_type deref_type = blob_read_uint32(ctx->blob);
511 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
512
513 deref->mode = blob_read_uint32(ctx->blob);
514 deref->type = decode_type_from_blob(ctx->blob);
515
516 read_dest(ctx, &deref->dest, &deref->instr);
517
518 if (deref_type == nir_deref_type_var) {
519 deref->var = read_object(ctx);
520 return deref;
521 }
522
523 read_src(ctx, &deref->parent, &deref->instr);
524
525 switch (deref->deref_type) {
526 case nir_deref_type_struct:
527 deref->strct.index = blob_read_uint32(ctx->blob);
528 break;
529
530 case nir_deref_type_array:
531 case nir_deref_type_ptr_as_array:
532 read_src(ctx, &deref->arr.index, &deref->instr);
533 break;
534
535 case nir_deref_type_cast:
536 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
537 break;
538
539 case nir_deref_type_array_wildcard:
540 /* Nothing to do */
541 break;
542
543 default:
544 unreachable("Invalid deref type");
545 }
546
547 return deref;
548 }
549
550 static void
551 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
552 {
553 blob_write_uint32(ctx->blob, intrin->intrinsic);
554
555 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
556 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
557
558 blob_write_uint32(ctx->blob, intrin->num_components);
559
560 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
561 write_dest(ctx, &intrin->dest);
562
563 for (unsigned i = 0; i < num_srcs; i++)
564 write_src(ctx, &intrin->src[i]);
565
566 for (unsigned i = 0; i < num_indices; i++)
567 blob_write_uint32(ctx->blob, intrin->const_index[i]);
568 }
569
570 static nir_intrinsic_instr *
571 read_intrinsic(read_ctx *ctx)
572 {
573 nir_intrinsic_op op = blob_read_uint32(ctx->blob);
574
575 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
576
577 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
578 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
579
580 intrin->num_components = blob_read_uint32(ctx->blob);
581
582 if (nir_intrinsic_infos[op].has_dest)
583 read_dest(ctx, &intrin->dest, &intrin->instr);
584
585 for (unsigned i = 0; i < num_srcs; i++)
586 read_src(ctx, &intrin->src[i], &intrin->instr);
587
588 for (unsigned i = 0; i < num_indices; i++)
589 intrin->const_index[i] = blob_read_uint32(ctx->blob);
590
591 return intrin;
592 }
593
594 static void
595 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
596 {
597 uint32_t val = lc->def.num_components;
598 val |= lc->def.bit_size << 3;
599 blob_write_uint32(ctx->blob, val);
600 blob_write_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
601 write_add_object(ctx, &lc->def);
602 }
603
604 static nir_load_const_instr *
605 read_load_const(read_ctx *ctx)
606 {
607 uint32_t val = blob_read_uint32(ctx->blob);
608
609 nir_load_const_instr *lc =
610 nir_load_const_instr_create(ctx->nir, val & 0x7, val >> 3);
611
612 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
613 read_add_object(ctx, &lc->def);
614 return lc;
615 }
616
617 static void
618 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
619 {
620 uint32_t val = undef->def.num_components;
621 val |= undef->def.bit_size << 3;
622 blob_write_uint32(ctx->blob, val);
623 write_add_object(ctx, &undef->def);
624 }
625
626 static nir_ssa_undef_instr *
627 read_ssa_undef(read_ctx *ctx)
628 {
629 uint32_t val = blob_read_uint32(ctx->blob);
630
631 nir_ssa_undef_instr *undef =
632 nir_ssa_undef_instr_create(ctx->nir, val & 0x7, val >> 3);
633
634 read_add_object(ctx, &undef->def);
635 return undef;
636 }
637
638 union packed_tex_data {
639 uint32_t u32;
640 struct {
641 enum glsl_sampler_dim sampler_dim:4;
642 nir_alu_type dest_type:8;
643 unsigned coord_components:3;
644 unsigned is_array:1;
645 unsigned is_shadow:1;
646 unsigned is_new_style_shadow:1;
647 unsigned component:2;
648 unsigned unused:10; /* Mark unused for valgrind. */
649 } u;
650 };
651
652 static void
653 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
654 {
655 blob_write_uint32(ctx->blob, tex->num_srcs);
656 blob_write_uint32(ctx->blob, tex->op);
657 blob_write_uint32(ctx->blob, tex->texture_index);
658 blob_write_uint32(ctx->blob, tex->texture_array_size);
659 blob_write_uint32(ctx->blob, tex->sampler_index);
660 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
661
662 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
663 union packed_tex_data packed = {
664 .u.sampler_dim = tex->sampler_dim,
665 .u.dest_type = tex->dest_type,
666 .u.coord_components = tex->coord_components,
667 .u.is_array = tex->is_array,
668 .u.is_shadow = tex->is_shadow,
669 .u.is_new_style_shadow = tex->is_new_style_shadow,
670 .u.component = tex->component,
671 };
672 blob_write_uint32(ctx->blob, packed.u32);
673
674 write_dest(ctx, &tex->dest);
675 for (unsigned i = 0; i < tex->num_srcs; i++) {
676 blob_write_uint32(ctx->blob, tex->src[i].src_type);
677 write_src(ctx, &tex->src[i].src);
678 }
679 }
680
681 static nir_tex_instr *
682 read_tex(read_ctx *ctx)
683 {
684 unsigned num_srcs = blob_read_uint32(ctx->blob);
685 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, num_srcs);
686
687 tex->op = blob_read_uint32(ctx->blob);
688 tex->texture_index = blob_read_uint32(ctx->blob);
689 tex->texture_array_size = blob_read_uint32(ctx->blob);
690 tex->sampler_index = blob_read_uint32(ctx->blob);
691 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
692
693 union packed_tex_data packed;
694 packed.u32 = blob_read_uint32(ctx->blob);
695 tex->sampler_dim = packed.u.sampler_dim;
696 tex->dest_type = packed.u.dest_type;
697 tex->coord_components = packed.u.coord_components;
698 tex->is_array = packed.u.is_array;
699 tex->is_shadow = packed.u.is_shadow;
700 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
701 tex->component = packed.u.component;
702
703 read_dest(ctx, &tex->dest, &tex->instr);
704 for (unsigned i = 0; i < tex->num_srcs; i++) {
705 tex->src[i].src_type = blob_read_uint32(ctx->blob);
706 read_src(ctx, &tex->src[i].src, &tex->instr);
707 }
708
709 return tex;
710 }
711
712 static void
713 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
714 {
715 /* Phi nodes are special, since they may reference SSA definitions and
716 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
717 * and then store enough information so that a later fixup pass can fill
718 * them in correctly.
719 */
720 write_dest(ctx, &phi->dest);
721
722 blob_write_uint32(ctx->blob, exec_list_length(&phi->srcs));
723
724 nir_foreach_phi_src(src, phi) {
725 assert(src->src.is_ssa);
726 size_t blob_offset = blob_reserve_uint32(ctx->blob);
727 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
728 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
729 write_phi_fixup fixup = {
730 .blob_offset = blob_offset,
731 .src = src->src.ssa,
732 .block = src->pred,
733 };
734 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
735 }
736 }
737
738 static void
739 write_fixup_phis(write_ctx *ctx)
740 {
741 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
742 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
743 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
744 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
745 }
746
747 util_dynarray_clear(&ctx->phi_fixups);
748 }
749
750 static nir_phi_instr *
751 read_phi(read_ctx *ctx, nir_block *blk)
752 {
753 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
754
755 read_dest(ctx, &phi->dest, &phi->instr);
756
757 unsigned num_srcs = blob_read_uint32(ctx->blob);
758
759 /* For similar reasons as before, we just store the index directly into the
760 * pointer, and let a later pass resolve the phi sources.
761 *
762 * In order to ensure that the copied sources (which are just the indices
763 * from the blob for now) don't get inserted into the old shader's use-def
764 * lists, we have to add the phi instruction *before* we set up its
765 * sources.
766 */
767 nir_instr_insert_after_block(blk, &phi->instr);
768
769 for (unsigned i = 0; i < num_srcs; i++) {
770 nir_phi_src *src = ralloc(phi, nir_phi_src);
771
772 src->src.is_ssa = true;
773 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
774 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
775
776 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
777 * we have to set the parent_instr manually. It doesn't really matter
778 * when we do it, so we might as well do it here.
779 */
780 src->src.parent_instr = &phi->instr;
781
782 /* Stash it in the list of phi sources. We'll walk this list and fix up
783 * sources at the very end of read_function_impl.
784 */
785 list_add(&src->src.use_link, &ctx->phi_srcs);
786
787 exec_list_push_tail(&phi->srcs, &src->node);
788 }
789
790 return phi;
791 }
792
793 static void
794 read_fixup_phis(read_ctx *ctx)
795 {
796 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
797 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
798 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
799
800 /* Remove from this list */
801 list_del(&src->src.use_link);
802
803 list_addtail(&src->src.use_link, &src->src.ssa->uses);
804 }
805 assert(list_is_empty(&ctx->phi_srcs));
806 }
807
808 static void
809 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
810 {
811 blob_write_uint32(ctx->blob, jmp->type);
812 }
813
814 static nir_jump_instr *
815 read_jump(read_ctx *ctx)
816 {
817 nir_jump_type type = blob_read_uint32(ctx->blob);
818 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, type);
819 return jmp;
820 }
821
822 static void
823 write_call(write_ctx *ctx, const nir_call_instr *call)
824 {
825 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
826
827 for (unsigned i = 0; i < call->num_params; i++)
828 write_src(ctx, &call->params[i]);
829 }
830
831 static nir_call_instr *
832 read_call(read_ctx *ctx)
833 {
834 nir_function *callee = read_object(ctx);
835 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
836
837 for (unsigned i = 0; i < call->num_params; i++)
838 read_src(ctx, &call->params[i], call);
839
840 return call;
841 }
842
843 static void
844 write_instr(write_ctx *ctx, const nir_instr *instr)
845 {
846 blob_write_uint32(ctx->blob, instr->type);
847 switch (instr->type) {
848 case nir_instr_type_alu:
849 write_alu(ctx, nir_instr_as_alu(instr));
850 break;
851 case nir_instr_type_deref:
852 write_deref(ctx, nir_instr_as_deref(instr));
853 break;
854 case nir_instr_type_intrinsic:
855 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
856 break;
857 case nir_instr_type_load_const:
858 write_load_const(ctx, nir_instr_as_load_const(instr));
859 break;
860 case nir_instr_type_ssa_undef:
861 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
862 break;
863 case nir_instr_type_tex:
864 write_tex(ctx, nir_instr_as_tex(instr));
865 break;
866 case nir_instr_type_phi:
867 write_phi(ctx, nir_instr_as_phi(instr));
868 break;
869 case nir_instr_type_jump:
870 write_jump(ctx, nir_instr_as_jump(instr));
871 break;
872 case nir_instr_type_call:
873 write_call(ctx, nir_instr_as_call(instr));
874 break;
875 case nir_instr_type_parallel_copy:
876 unreachable("Cannot write parallel copies");
877 default:
878 unreachable("bad instr type");
879 }
880 }
881
882 static void
883 read_instr(read_ctx *ctx, nir_block *block)
884 {
885 nir_instr_type type = blob_read_uint32(ctx->blob);
886 nir_instr *instr;
887 switch (type) {
888 case nir_instr_type_alu:
889 instr = &read_alu(ctx)->instr;
890 break;
891 case nir_instr_type_deref:
892 instr = &read_deref(ctx)->instr;
893 break;
894 case nir_instr_type_intrinsic:
895 instr = &read_intrinsic(ctx)->instr;
896 break;
897 case nir_instr_type_load_const:
898 instr = &read_load_const(ctx)->instr;
899 break;
900 case nir_instr_type_ssa_undef:
901 instr = &read_ssa_undef(ctx)->instr;
902 break;
903 case nir_instr_type_tex:
904 instr = &read_tex(ctx)->instr;
905 break;
906 case nir_instr_type_phi:
907 /* Phi instructions are a bit of a special case when reading because we
908 * don't want inserting the instruction to automatically handle use/defs
909 * for us. Instead, we need to wait until all the blocks/instructions
910 * are read so that we can set their sources up.
911 */
912 read_phi(ctx, block);
913 return;
914 case nir_instr_type_jump:
915 instr = &read_jump(ctx)->instr;
916 break;
917 case nir_instr_type_call:
918 instr = &read_call(ctx)->instr;
919 break;
920 case nir_instr_type_parallel_copy:
921 unreachable("Cannot read parallel copies");
922 default:
923 unreachable("bad instr type");
924 }
925
926 nir_instr_insert_after_block(block, instr);
927 }
928
929 static void
930 write_block(write_ctx *ctx, const nir_block *block)
931 {
932 write_add_object(ctx, block);
933 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
934 nir_foreach_instr(instr, block)
935 write_instr(ctx, instr);
936 }
937
938 static void
939 read_block(read_ctx *ctx, struct exec_list *cf_list)
940 {
941 /* Don't actually create a new block. Just use the one from the tail of
942 * the list. NIR guarantees that the tail of the list is a block and that
943 * no two blocks are side-by-side in the IR; It should be empty.
944 */
945 nir_block *block =
946 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
947
948 read_add_object(ctx, block);
949 unsigned num_instrs = blob_read_uint32(ctx->blob);
950 for (unsigned i = 0; i < num_instrs; i++) {
951 read_instr(ctx, block);
952 }
953 }
954
955 static void
956 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
957
958 static void
959 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
960
961 static void
962 write_if(write_ctx *ctx, nir_if *nif)
963 {
964 write_src(ctx, &nif->condition);
965
966 write_cf_list(ctx, &nif->then_list);
967 write_cf_list(ctx, &nif->else_list);
968 }
969
970 static void
971 read_if(read_ctx *ctx, struct exec_list *cf_list)
972 {
973 nir_if *nif = nir_if_create(ctx->nir);
974
975 read_src(ctx, &nif->condition, nif);
976
977 nir_cf_node_insert_end(cf_list, &nif->cf_node);
978
979 read_cf_list(ctx, &nif->then_list);
980 read_cf_list(ctx, &nif->else_list);
981 }
982
983 static void
984 write_loop(write_ctx *ctx, nir_loop *loop)
985 {
986 write_cf_list(ctx, &loop->body);
987 }
988
989 static void
990 read_loop(read_ctx *ctx, struct exec_list *cf_list)
991 {
992 nir_loop *loop = nir_loop_create(ctx->nir);
993
994 nir_cf_node_insert_end(cf_list, &loop->cf_node);
995
996 read_cf_list(ctx, &loop->body);
997 }
998
999 static void
1000 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1001 {
1002 blob_write_uint32(ctx->blob, cf->type);
1003
1004 switch (cf->type) {
1005 case nir_cf_node_block:
1006 write_block(ctx, nir_cf_node_as_block(cf));
1007 break;
1008 case nir_cf_node_if:
1009 write_if(ctx, nir_cf_node_as_if(cf));
1010 break;
1011 case nir_cf_node_loop:
1012 write_loop(ctx, nir_cf_node_as_loop(cf));
1013 break;
1014 default:
1015 unreachable("bad cf type");
1016 }
1017 }
1018
1019 static void
1020 read_cf_node(read_ctx *ctx, struct exec_list *list)
1021 {
1022 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1023
1024 switch (type) {
1025 case nir_cf_node_block:
1026 read_block(ctx, list);
1027 break;
1028 case nir_cf_node_if:
1029 read_if(ctx, list);
1030 break;
1031 case nir_cf_node_loop:
1032 read_loop(ctx, list);
1033 break;
1034 default:
1035 unreachable("bad cf type");
1036 }
1037 }
1038
1039 static void
1040 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1041 {
1042 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1043 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1044 write_cf_node(ctx, cf);
1045 }
1046 }
1047
1048 static void
1049 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1050 {
1051 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1052 for (unsigned i = 0; i < num_cf_nodes; i++)
1053 read_cf_node(ctx, cf_list);
1054 }
1055
1056 static void
1057 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1058 {
1059 write_var_list(ctx, &fi->locals);
1060 write_reg_list(ctx, &fi->registers);
1061 blob_write_uint32(ctx->blob, fi->reg_alloc);
1062
1063 write_cf_list(ctx, &fi->body);
1064 write_fixup_phis(ctx);
1065 }
1066
1067 static nir_function_impl *
1068 read_function_impl(read_ctx *ctx, nir_function *fxn)
1069 {
1070 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1071 fi->function = fxn;
1072
1073 read_var_list(ctx, &fi->locals);
1074 read_reg_list(ctx, &fi->registers);
1075 fi->reg_alloc = blob_read_uint32(ctx->blob);
1076
1077 read_cf_list(ctx, &fi->body);
1078 read_fixup_phis(ctx);
1079
1080 fi->valid_metadata = 0;
1081
1082 return fi;
1083 }
1084
1085 static void
1086 write_function(write_ctx *ctx, const nir_function *fxn)
1087 {
1088 uint32_t flags = fxn->is_entrypoint;
1089 if (fxn->name)
1090 flags |= 0x2;
1091 if (fxn->impl)
1092 flags |= 0x4;
1093 blob_write_uint32(ctx->blob, flags);
1094 if (fxn->name)
1095 blob_write_string(ctx->blob, fxn->name);
1096
1097 write_add_object(ctx, fxn);
1098
1099 blob_write_uint32(ctx->blob, fxn->num_params);
1100 for (unsigned i = 0; i < fxn->num_params; i++) {
1101 uint32_t val =
1102 ((uint32_t)fxn->params[i].num_components) |
1103 ((uint32_t)fxn->params[i].bit_size) << 8;
1104 blob_write_uint32(ctx->blob, val);
1105 }
1106
1107 /* At first glance, it looks like we should write the function_impl here.
1108 * However, call instructions need to be able to reference at least the
1109 * function and those will get processed as we write the function_impls.
1110 * We stop here and write function_impls as a second pass.
1111 */
1112 }
1113
1114 static void
1115 read_function(read_ctx *ctx)
1116 {
1117 uint32_t flags = blob_read_uint32(ctx->blob);
1118 bool has_name = flags & 0x2;
1119 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1120
1121 nir_function *fxn = nir_function_create(ctx->nir, name);
1122
1123 read_add_object(ctx, fxn);
1124
1125 fxn->num_params = blob_read_uint32(ctx->blob);
1126 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1127 for (unsigned i = 0; i < fxn->num_params; i++) {
1128 uint32_t val = blob_read_uint32(ctx->blob);
1129 fxn->params[i].num_components = val & 0xff;
1130 fxn->params[i].bit_size = (val >> 8) & 0xff;
1131 }
1132
1133 fxn->is_entrypoint = flags & 0x1;
1134 if (flags & 0x4)
1135 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1136 }
1137
1138 /**
1139 * Serialize NIR into a binary blob.
1140 *
1141 * \param strip Don't serialize information only useful for debugging,
1142 * such as variable names, making cache hits from similar
1143 * shaders more likely.
1144 */
1145 void
1146 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1147 {
1148 write_ctx ctx = {0};
1149 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1150 ctx.blob = blob;
1151 ctx.nir = nir;
1152 ctx.strip = strip;
1153 util_dynarray_init(&ctx.phi_fixups, NULL);
1154
1155 size_t idx_size_offset = blob_reserve_uint32(blob);
1156
1157 struct shader_info info = nir->info;
1158 uint32_t strings = 0;
1159 if (!strip && info.name)
1160 strings |= 0x1;
1161 if (!strip && info.label)
1162 strings |= 0x2;
1163 blob_write_uint32(blob, strings);
1164 if (!strip && info.name)
1165 blob_write_string(blob, info.name);
1166 if (!strip && info.label)
1167 blob_write_string(blob, info.label);
1168 info.name = info.label = NULL;
1169 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1170
1171 write_var_list(&ctx, &nir->uniforms);
1172 write_var_list(&ctx, &nir->inputs);
1173 write_var_list(&ctx, &nir->outputs);
1174 write_var_list(&ctx, &nir->shared);
1175 write_var_list(&ctx, &nir->globals);
1176 write_var_list(&ctx, &nir->system_values);
1177
1178 blob_write_uint32(blob, nir->num_inputs);
1179 blob_write_uint32(blob, nir->num_uniforms);
1180 blob_write_uint32(blob, nir->num_outputs);
1181 blob_write_uint32(blob, nir->num_shared);
1182 blob_write_uint32(blob, nir->scratch_size);
1183
1184 blob_write_uint32(blob, exec_list_length(&nir->functions));
1185 nir_foreach_function(fxn, nir) {
1186 write_function(&ctx, fxn);
1187 }
1188
1189 nir_foreach_function(fxn, nir) {
1190 if (fxn->impl)
1191 write_function_impl(&ctx, fxn->impl);
1192 }
1193
1194 blob_write_uint32(blob, nir->constant_data_size);
1195 if (nir->constant_data_size > 0)
1196 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
1197
1198 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1199
1200 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1201 util_dynarray_fini(&ctx.phi_fixups);
1202 }
1203
1204 nir_shader *
1205 nir_deserialize(void *mem_ctx,
1206 const struct nir_shader_compiler_options *options,
1207 struct blob_reader *blob)
1208 {
1209 read_ctx ctx = {0};
1210 ctx.blob = blob;
1211 list_inithead(&ctx.phi_srcs);
1212 ctx.idx_table_len = blob_read_uint32(blob);
1213 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1214
1215 uint32_t strings = blob_read_uint32(blob);
1216 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1217 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1218
1219 struct shader_info info;
1220 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1221
1222 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1223
1224 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1225 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1226
1227 ctx.nir->info = info;
1228
1229 read_var_list(&ctx, &ctx.nir->uniforms);
1230 read_var_list(&ctx, &ctx.nir->inputs);
1231 read_var_list(&ctx, &ctx.nir->outputs);
1232 read_var_list(&ctx, &ctx.nir->shared);
1233 read_var_list(&ctx, &ctx.nir->globals);
1234 read_var_list(&ctx, &ctx.nir->system_values);
1235
1236 ctx.nir->num_inputs = blob_read_uint32(blob);
1237 ctx.nir->num_uniforms = blob_read_uint32(blob);
1238 ctx.nir->num_outputs = blob_read_uint32(blob);
1239 ctx.nir->num_shared = blob_read_uint32(blob);
1240 ctx.nir->scratch_size = blob_read_uint32(blob);
1241
1242 unsigned num_functions = blob_read_uint32(blob);
1243 for (unsigned i = 0; i < num_functions; i++)
1244 read_function(&ctx);
1245
1246 nir_foreach_function(fxn, ctx.nir) {
1247 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
1248 fxn->impl = read_function_impl(&ctx, fxn);
1249 }
1250
1251 ctx.nir->constant_data_size = blob_read_uint32(blob);
1252 if (ctx.nir->constant_data_size > 0) {
1253 ctx.nir->constant_data =
1254 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
1255 blob_copy_bytes(blob, ctx.nir->constant_data,
1256 ctx.nir->constant_data_size);
1257 }
1258
1259 free(ctx.idx_table);
1260
1261 return ctx.nir;
1262 }
1263
1264 void
1265 nir_shader_serialize_deserialize(nir_shader *shader)
1266 {
1267 const struct nir_shader_compiler_options *options = shader->options;
1268
1269 struct blob writer;
1270 blob_init(&writer);
1271 nir_serialize(&writer, shader, false);
1272
1273 /* Delete all of dest's ralloc children but leave dest alone */
1274 void *dead_ctx = ralloc_context(NULL);
1275 ralloc_adopt(dead_ctx, shader);
1276 ralloc_free(dead_ctx);
1277
1278 dead_ctx = ralloc_context(NULL);
1279
1280 struct blob_reader reader;
1281 blob_reader_init(&reader, writer.data, writer.size);
1282 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
1283
1284 blob_finish(&writer);
1285
1286 nir_shader_replace(shader, copy);
1287 ralloc_free(dead_ctx);
1288 }