nir: Tag entrypoint for easy recognition by nir_shader_get_entrypoint()
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27
28 typedef struct {
29 size_t blob_offset;
30 nir_ssa_def *src;
31 nir_block *block;
32 } write_phi_fixup;
33
34 typedef struct {
35 const nir_shader *nir;
36
37 struct blob *blob;
38
39 /* maps pointer to index */
40 struct hash_table *remap_table;
41
42 /* the next index to assign to a NIR in-memory object */
43 uintptr_t next_idx;
44
45 /* Array of write_phi_fixup structs representing phi sources that need to
46 * be resolved in the second pass.
47 */
48 struct util_dynarray phi_fixups;
49 } write_ctx;
50
51 typedef struct {
52 nir_shader *nir;
53
54 struct blob_reader *blob;
55
56 /* the next index to assign to a NIR in-memory object */
57 uintptr_t next_idx;
58
59 /* The length of the index -> object table */
60 uintptr_t idx_table_len;
61
62 /* map from index to deserialized pointer */
63 void **idx_table;
64
65 /* List of phi sources. */
66 struct list_head phi_srcs;
67
68 } read_ctx;
69
70 static void
71 write_add_object(write_ctx *ctx, const void *obj)
72 {
73 uintptr_t index = ctx->next_idx++;
74 _mesa_hash_table_insert(ctx->remap_table, obj, (void *) index);
75 }
76
77 static uintptr_t
78 write_lookup_object(write_ctx *ctx, const void *obj)
79 {
80 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
81 assert(entry);
82 return (uintptr_t) entry->data;
83 }
84
85 static void
86 write_object(write_ctx *ctx, const void *obj)
87 {
88 blob_write_intptr(ctx->blob, write_lookup_object(ctx, obj));
89 }
90
91 static void
92 read_add_object(read_ctx *ctx, void *obj)
93 {
94 assert(ctx->next_idx < ctx->idx_table_len);
95 ctx->idx_table[ctx->next_idx++] = obj;
96 }
97
98 static void *
99 read_lookup_object(read_ctx *ctx, uintptr_t idx)
100 {
101 assert(idx < ctx->idx_table_len);
102 return ctx->idx_table[idx];
103 }
104
105 static void *
106 read_object(read_ctx *ctx)
107 {
108 return read_lookup_object(ctx, blob_read_intptr(ctx->blob));
109 }
110
111 static void
112 write_constant(write_ctx *ctx, const nir_constant *c)
113 {
114 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
115 blob_write_uint32(ctx->blob, c->num_elements);
116 for (unsigned i = 0; i < c->num_elements; i++)
117 write_constant(ctx, c->elements[i]);
118 }
119
120 static nir_constant *
121 read_constant(read_ctx *ctx, nir_variable *nvar)
122 {
123 nir_constant *c = ralloc(nvar, nir_constant);
124
125 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
126 c->num_elements = blob_read_uint32(ctx->blob);
127 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
128 for (unsigned i = 0; i < c->num_elements; i++)
129 c->elements[i] = read_constant(ctx, nvar);
130
131 return c;
132 }
133
134 static void
135 write_variable(write_ctx *ctx, const nir_variable *var)
136 {
137 write_add_object(ctx, var);
138 encode_type_to_blob(ctx->blob, var->type);
139 blob_write_uint32(ctx->blob, !!(var->name));
140 if (var->name)
141 blob_write_string(ctx->blob, var->name);
142 blob_write_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
143 blob_write_uint32(ctx->blob, var->num_state_slots);
144 blob_write_bytes(ctx->blob, (uint8_t *) var->state_slots,
145 var->num_state_slots * sizeof(nir_state_slot));
146 blob_write_uint32(ctx->blob, !!(var->constant_initializer));
147 if (var->constant_initializer)
148 write_constant(ctx, var->constant_initializer);
149 blob_write_uint32(ctx->blob, !!(var->interface_type));
150 if (var->interface_type)
151 encode_type_to_blob(ctx->blob, var->interface_type);
152 blob_write_uint32(ctx->blob, var->num_members);
153 if (var->num_members > 0) {
154 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
155 var->num_members * sizeof(*var->members));
156 }
157 }
158
159 static nir_variable *
160 read_variable(read_ctx *ctx)
161 {
162 nir_variable *var = rzalloc(ctx->nir, nir_variable);
163 read_add_object(ctx, var);
164
165 var->type = decode_type_from_blob(ctx->blob);
166 bool has_name = blob_read_uint32(ctx->blob);
167 if (has_name) {
168 const char *name = blob_read_string(ctx->blob);
169 var->name = ralloc_strdup(var, name);
170 } else {
171 var->name = NULL;
172 }
173 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
174 var->num_state_slots = blob_read_uint32(ctx->blob);
175 var->state_slots = ralloc_array(var, nir_state_slot, var->num_state_slots);
176 blob_copy_bytes(ctx->blob, (uint8_t *) var->state_slots,
177 var->num_state_slots * sizeof(nir_state_slot));
178 bool has_const_initializer = blob_read_uint32(ctx->blob);
179 if (has_const_initializer)
180 var->constant_initializer = read_constant(ctx, var);
181 else
182 var->constant_initializer = NULL;
183 bool has_interface_type = blob_read_uint32(ctx->blob);
184 if (has_interface_type)
185 var->interface_type = decode_type_from_blob(ctx->blob);
186 else
187 var->interface_type = NULL;
188 var->num_members = blob_read_uint32(ctx->blob);
189 if (var->num_members > 0) {
190 var->members = ralloc_array(var, struct nir_variable_data,
191 var->num_members);
192 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
193 var->num_members * sizeof(*var->members));
194 }
195
196 return var;
197 }
198
199 static void
200 write_var_list(write_ctx *ctx, const struct exec_list *src)
201 {
202 blob_write_uint32(ctx->blob, exec_list_length(src));
203 foreach_list_typed(nir_variable, var, node, src) {
204 write_variable(ctx, var);
205 }
206 }
207
208 static void
209 read_var_list(read_ctx *ctx, struct exec_list *dst)
210 {
211 exec_list_make_empty(dst);
212 unsigned num_vars = blob_read_uint32(ctx->blob);
213 for (unsigned i = 0; i < num_vars; i++) {
214 nir_variable *var = read_variable(ctx);
215 exec_list_push_tail(dst, &var->node);
216 }
217 }
218
219 static void
220 write_register(write_ctx *ctx, const nir_register *reg)
221 {
222 write_add_object(ctx, reg);
223 blob_write_uint32(ctx->blob, reg->num_components);
224 blob_write_uint32(ctx->blob, reg->bit_size);
225 blob_write_uint32(ctx->blob, reg->num_array_elems);
226 blob_write_uint32(ctx->blob, reg->index);
227 blob_write_uint32(ctx->blob, !!(reg->name));
228 if (reg->name)
229 blob_write_string(ctx->blob, reg->name);
230 blob_write_uint32(ctx->blob, reg->is_global << 1 | reg->is_packed);
231 }
232
233 static nir_register *
234 read_register(read_ctx *ctx)
235 {
236 nir_register *reg = ralloc(ctx->nir, nir_register);
237 read_add_object(ctx, reg);
238 reg->num_components = blob_read_uint32(ctx->blob);
239 reg->bit_size = blob_read_uint32(ctx->blob);
240 reg->num_array_elems = blob_read_uint32(ctx->blob);
241 reg->index = blob_read_uint32(ctx->blob);
242 bool has_name = blob_read_uint32(ctx->blob);
243 if (has_name) {
244 const char *name = blob_read_string(ctx->blob);
245 reg->name = ralloc_strdup(reg, name);
246 } else {
247 reg->name = NULL;
248 }
249 unsigned flags = blob_read_uint32(ctx->blob);
250 reg->is_global = flags & 0x2;
251 reg->is_packed = flags & 0x1;
252
253 list_inithead(&reg->uses);
254 list_inithead(&reg->defs);
255 list_inithead(&reg->if_uses);
256
257 return reg;
258 }
259
260 static void
261 write_reg_list(write_ctx *ctx, const struct exec_list *src)
262 {
263 blob_write_uint32(ctx->blob, exec_list_length(src));
264 foreach_list_typed(nir_register, reg, node, src)
265 write_register(ctx, reg);
266 }
267
268 static void
269 read_reg_list(read_ctx *ctx, struct exec_list *dst)
270 {
271 exec_list_make_empty(dst);
272 unsigned num_regs = blob_read_uint32(ctx->blob);
273 for (unsigned i = 0; i < num_regs; i++) {
274 nir_register *reg = read_register(ctx);
275 exec_list_push_tail(dst, &reg->node);
276 }
277 }
278
279 static void
280 write_src(write_ctx *ctx, const nir_src *src)
281 {
282 /* Since sources are very frequent, we try to save some space when storing
283 * them. In particular, we store whether the source is a register and
284 * whether the register has an indirect index in the low two bits. We can
285 * assume that the high two bits of the index are zero, since otherwise our
286 * address space would've been exhausted allocating the remap table!
287 */
288 if (src->is_ssa) {
289 uintptr_t idx = write_lookup_object(ctx, src->ssa) << 2;
290 idx |= 1;
291 blob_write_intptr(ctx->blob, idx);
292 } else {
293 uintptr_t idx = write_lookup_object(ctx, src->reg.reg) << 2;
294 if (src->reg.indirect)
295 idx |= 2;
296 blob_write_intptr(ctx->blob, idx);
297 blob_write_uint32(ctx->blob, src->reg.base_offset);
298 if (src->reg.indirect) {
299 write_src(ctx, src->reg.indirect);
300 }
301 }
302 }
303
304 static void
305 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
306 {
307 uintptr_t val = blob_read_intptr(ctx->blob);
308 uintptr_t idx = val >> 2;
309 src->is_ssa = val & 0x1;
310 if (src->is_ssa) {
311 src->ssa = read_lookup_object(ctx, idx);
312 } else {
313 bool is_indirect = val & 0x2;
314 src->reg.reg = read_lookup_object(ctx, idx);
315 src->reg.base_offset = blob_read_uint32(ctx->blob);
316 if (is_indirect) {
317 src->reg.indirect = ralloc(mem_ctx, nir_src);
318 read_src(ctx, src->reg.indirect, mem_ctx);
319 } else {
320 src->reg.indirect = NULL;
321 }
322 }
323 }
324
325 static void
326 write_dest(write_ctx *ctx, const nir_dest *dst)
327 {
328 uint32_t val = dst->is_ssa;
329 if (dst->is_ssa) {
330 val |= !!(dst->ssa.name) << 1;
331 val |= dst->ssa.num_components << 2;
332 val |= dst->ssa.bit_size << 5;
333 } else {
334 val |= !!(dst->reg.indirect) << 1;
335 }
336 blob_write_uint32(ctx->blob, val);
337 if (dst->is_ssa) {
338 write_add_object(ctx, &dst->ssa);
339 if (dst->ssa.name)
340 blob_write_string(ctx->blob, dst->ssa.name);
341 } else {
342 blob_write_intptr(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
343 blob_write_uint32(ctx->blob, dst->reg.base_offset);
344 if (dst->reg.indirect)
345 write_src(ctx, dst->reg.indirect);
346 }
347 }
348
349 static void
350 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr)
351 {
352 uint32_t val = blob_read_uint32(ctx->blob);
353 bool is_ssa = val & 0x1;
354 if (is_ssa) {
355 bool has_name = val & 0x2;
356 unsigned num_components = (val >> 2) & 0x7;
357 unsigned bit_size = val >> 5;
358 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
359 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
360 read_add_object(ctx, &dst->ssa);
361 } else {
362 bool is_indirect = val & 0x2;
363 dst->reg.reg = read_object(ctx);
364 dst->reg.base_offset = blob_read_uint32(ctx->blob);
365 if (is_indirect) {
366 dst->reg.indirect = ralloc(instr, nir_src);
367 read_src(ctx, dst->reg.indirect, instr);
368 }
369 }
370 }
371
372 static void
373 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
374 {
375 blob_write_uint32(ctx->blob, alu->op);
376 uint32_t flags = alu->exact;
377 flags |= alu->dest.saturate << 1;
378 flags |= alu->dest.write_mask << 2;
379 blob_write_uint32(ctx->blob, flags);
380
381 write_dest(ctx, &alu->dest.dest);
382
383 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
384 write_src(ctx, &alu->src[i].src);
385 flags = alu->src[i].negate;
386 flags |= alu->src[i].abs << 1;
387 for (unsigned j = 0; j < 4; j++)
388 flags |= alu->src[i].swizzle[j] << (2 + 2 * j);
389 blob_write_uint32(ctx->blob, flags);
390 }
391 }
392
393 static nir_alu_instr *
394 read_alu(read_ctx *ctx)
395 {
396 nir_op op = blob_read_uint32(ctx->blob);
397 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, op);
398
399 uint32_t flags = blob_read_uint32(ctx->blob);
400 alu->exact = flags & 1;
401 alu->dest.saturate = flags & 2;
402 alu->dest.write_mask = flags >> 2;
403
404 read_dest(ctx, &alu->dest.dest, &alu->instr);
405
406 for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
407 read_src(ctx, &alu->src[i].src, &alu->instr);
408 flags = blob_read_uint32(ctx->blob);
409 alu->src[i].negate = flags & 1;
410 alu->src[i].abs = flags & 2;
411 for (unsigned j = 0; j < 4; j++)
412 alu->src[i].swizzle[j] = (flags >> (2 * j + 2)) & 3;
413 }
414
415 return alu;
416 }
417
418 static void
419 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
420 {
421 blob_write_uint32(ctx->blob, deref->deref_type);
422
423 blob_write_uint32(ctx->blob, deref->mode);
424 encode_type_to_blob(ctx->blob, deref->type);
425
426 write_dest(ctx, &deref->dest);
427
428 if (deref->deref_type == nir_deref_type_var) {
429 write_object(ctx, deref->var);
430 return;
431 }
432
433 write_src(ctx, &deref->parent);
434
435 switch (deref->deref_type) {
436 case nir_deref_type_struct:
437 blob_write_uint32(ctx->blob, deref->strct.index);
438 break;
439
440 case nir_deref_type_array:
441 case nir_deref_type_ptr_as_array:
442 write_src(ctx, &deref->arr.index);
443 break;
444
445 case nir_deref_type_cast:
446 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
447 break;
448
449 case nir_deref_type_array_wildcard:
450 /* Nothing to do */
451 break;
452
453 default:
454 unreachable("Invalid deref type");
455 }
456 }
457
458 static nir_deref_instr *
459 read_deref(read_ctx *ctx)
460 {
461 nir_deref_type deref_type = blob_read_uint32(ctx->blob);
462 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
463
464 deref->mode = blob_read_uint32(ctx->blob);
465 deref->type = decode_type_from_blob(ctx->blob);
466
467 read_dest(ctx, &deref->dest, &deref->instr);
468
469 if (deref_type == nir_deref_type_var) {
470 deref->var = read_object(ctx);
471 return deref;
472 }
473
474 read_src(ctx, &deref->parent, &deref->instr);
475
476 switch (deref->deref_type) {
477 case nir_deref_type_struct:
478 deref->strct.index = blob_read_uint32(ctx->blob);
479 break;
480
481 case nir_deref_type_array:
482 case nir_deref_type_ptr_as_array:
483 read_src(ctx, &deref->arr.index, &deref->instr);
484 break;
485
486 case nir_deref_type_cast:
487 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
488 break;
489
490 case nir_deref_type_array_wildcard:
491 /* Nothing to do */
492 break;
493
494 default:
495 unreachable("Invalid deref type");
496 }
497
498 return deref;
499 }
500
501 static void
502 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
503 {
504 blob_write_uint32(ctx->blob, intrin->intrinsic);
505
506 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
507 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
508
509 blob_write_uint32(ctx->blob, intrin->num_components);
510
511 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
512 write_dest(ctx, &intrin->dest);
513
514 for (unsigned i = 0; i < num_srcs; i++)
515 write_src(ctx, &intrin->src[i]);
516
517 for (unsigned i = 0; i < num_indices; i++)
518 blob_write_uint32(ctx->blob, intrin->const_index[i]);
519 }
520
521 static nir_intrinsic_instr *
522 read_intrinsic(read_ctx *ctx)
523 {
524 nir_intrinsic_op op = blob_read_uint32(ctx->blob);
525
526 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
527
528 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
529 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
530
531 intrin->num_components = blob_read_uint32(ctx->blob);
532
533 if (nir_intrinsic_infos[op].has_dest)
534 read_dest(ctx, &intrin->dest, &intrin->instr);
535
536 for (unsigned i = 0; i < num_srcs; i++)
537 read_src(ctx, &intrin->src[i], &intrin->instr);
538
539 for (unsigned i = 0; i < num_indices; i++)
540 intrin->const_index[i] = blob_read_uint32(ctx->blob);
541
542 return intrin;
543 }
544
545 static void
546 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
547 {
548 uint32_t val = lc->def.num_components;
549 val |= lc->def.bit_size << 3;
550 blob_write_uint32(ctx->blob, val);
551 blob_write_bytes(ctx->blob, (uint8_t *) &lc->value, sizeof(lc->value));
552 write_add_object(ctx, &lc->def);
553 }
554
555 static nir_load_const_instr *
556 read_load_const(read_ctx *ctx)
557 {
558 uint32_t val = blob_read_uint32(ctx->blob);
559
560 nir_load_const_instr *lc =
561 nir_load_const_instr_create(ctx->nir, val & 0x7, val >> 3);
562
563 blob_copy_bytes(ctx->blob, (uint8_t *) &lc->value, sizeof(lc->value));
564 read_add_object(ctx, &lc->def);
565 return lc;
566 }
567
568 static void
569 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
570 {
571 uint32_t val = undef->def.num_components;
572 val |= undef->def.bit_size << 3;
573 blob_write_uint32(ctx->blob, val);
574 write_add_object(ctx, &undef->def);
575 }
576
577 static nir_ssa_undef_instr *
578 read_ssa_undef(read_ctx *ctx)
579 {
580 uint32_t val = blob_read_uint32(ctx->blob);
581
582 nir_ssa_undef_instr *undef =
583 nir_ssa_undef_instr_create(ctx->nir, val & 0x7, val >> 3);
584
585 read_add_object(ctx, &undef->def);
586 return undef;
587 }
588
589 union packed_tex_data {
590 uint32_t u32;
591 struct {
592 enum glsl_sampler_dim sampler_dim:4;
593 nir_alu_type dest_type:8;
594 unsigned coord_components:3;
595 unsigned is_array:1;
596 unsigned is_shadow:1;
597 unsigned is_new_style_shadow:1;
598 unsigned component:2;
599 unsigned unused:10; /* Mark unused for valgrind. */
600 } u;
601 };
602
603 static void
604 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
605 {
606 blob_write_uint32(ctx->blob, tex->num_srcs);
607 blob_write_uint32(ctx->blob, tex->op);
608 blob_write_uint32(ctx->blob, tex->texture_index);
609 blob_write_uint32(ctx->blob, tex->texture_array_size);
610 blob_write_uint32(ctx->blob, tex->sampler_index);
611
612 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
613 union packed_tex_data packed = {
614 .u.sampler_dim = tex->sampler_dim,
615 .u.dest_type = tex->dest_type,
616 .u.coord_components = tex->coord_components,
617 .u.is_array = tex->is_array,
618 .u.is_shadow = tex->is_shadow,
619 .u.is_new_style_shadow = tex->is_new_style_shadow,
620 .u.component = tex->component,
621 };
622 blob_write_uint32(ctx->blob, packed.u32);
623
624 write_dest(ctx, &tex->dest);
625 for (unsigned i = 0; i < tex->num_srcs; i++) {
626 blob_write_uint32(ctx->blob, tex->src[i].src_type);
627 write_src(ctx, &tex->src[i].src);
628 }
629 }
630
631 static nir_tex_instr *
632 read_tex(read_ctx *ctx)
633 {
634 unsigned num_srcs = blob_read_uint32(ctx->blob);
635 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, num_srcs);
636
637 tex->op = blob_read_uint32(ctx->blob);
638 tex->texture_index = blob_read_uint32(ctx->blob);
639 tex->texture_array_size = blob_read_uint32(ctx->blob);
640 tex->sampler_index = blob_read_uint32(ctx->blob);
641
642 union packed_tex_data packed;
643 packed.u32 = blob_read_uint32(ctx->blob);
644 tex->sampler_dim = packed.u.sampler_dim;
645 tex->dest_type = packed.u.dest_type;
646 tex->coord_components = packed.u.coord_components;
647 tex->is_array = packed.u.is_array;
648 tex->is_shadow = packed.u.is_shadow;
649 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
650 tex->component = packed.u.component;
651
652 read_dest(ctx, &tex->dest, &tex->instr);
653 for (unsigned i = 0; i < tex->num_srcs; i++) {
654 tex->src[i].src_type = blob_read_uint32(ctx->blob);
655 read_src(ctx, &tex->src[i].src, &tex->instr);
656 }
657
658 return tex;
659 }
660
661 static void
662 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
663 {
664 /* Phi nodes are special, since they may reference SSA definitions and
665 * basic blocks that don't exist yet. We leave two empty uintptr_t's here,
666 * and then store enough information so that a later fixup pass can fill
667 * them in correctly.
668 */
669 write_dest(ctx, &phi->dest);
670
671 blob_write_uint32(ctx->blob, exec_list_length(&phi->srcs));
672
673 nir_foreach_phi_src(src, phi) {
674 assert(src->src.is_ssa);
675 size_t blob_offset = blob_reserve_intptr(ctx->blob);
676 MAYBE_UNUSED size_t blob_offset2 = blob_reserve_intptr(ctx->blob);
677 assert(blob_offset + sizeof(uintptr_t) == blob_offset2);
678 write_phi_fixup fixup = {
679 .blob_offset = blob_offset,
680 .src = src->src.ssa,
681 .block = src->pred,
682 };
683 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
684 }
685 }
686
687 static void
688 write_fixup_phis(write_ctx *ctx)
689 {
690 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
691 uintptr_t *blob_ptr = (uintptr_t *)(ctx->blob->data + fixup->blob_offset);
692 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
693 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
694 }
695
696 util_dynarray_clear(&ctx->phi_fixups);
697 }
698
699 static nir_phi_instr *
700 read_phi(read_ctx *ctx, nir_block *blk)
701 {
702 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
703
704 read_dest(ctx, &phi->dest, &phi->instr);
705
706 unsigned num_srcs = blob_read_uint32(ctx->blob);
707
708 /* For similar reasons as before, we just store the index directly into the
709 * pointer, and let a later pass resolve the phi sources.
710 *
711 * In order to ensure that the copied sources (which are just the indices
712 * from the blob for now) don't get inserted into the old shader's use-def
713 * lists, we have to add the phi instruction *before* we set up its
714 * sources.
715 */
716 nir_instr_insert_after_block(blk, &phi->instr);
717
718 for (unsigned i = 0; i < num_srcs; i++) {
719 nir_phi_src *src = ralloc(phi, nir_phi_src);
720
721 src->src.is_ssa = true;
722 src->src.ssa = (nir_ssa_def *) blob_read_intptr(ctx->blob);
723 src->pred = (nir_block *) blob_read_intptr(ctx->blob);
724
725 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
726 * we have to set the parent_instr manually. It doesn't really matter
727 * when we do it, so we might as well do it here.
728 */
729 src->src.parent_instr = &phi->instr;
730
731 /* Stash it in the list of phi sources. We'll walk this list and fix up
732 * sources at the very end of read_function_impl.
733 */
734 list_add(&src->src.use_link, &ctx->phi_srcs);
735
736 exec_list_push_tail(&phi->srcs, &src->node);
737 }
738
739 return phi;
740 }
741
742 static void
743 read_fixup_phis(read_ctx *ctx)
744 {
745 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
746 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
747 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
748
749 /* Remove from this list */
750 list_del(&src->src.use_link);
751
752 list_addtail(&src->src.use_link, &src->src.ssa->uses);
753 }
754 assert(list_empty(&ctx->phi_srcs));
755 }
756
757 static void
758 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
759 {
760 blob_write_uint32(ctx->blob, jmp->type);
761 }
762
763 static nir_jump_instr *
764 read_jump(read_ctx *ctx)
765 {
766 nir_jump_type type = blob_read_uint32(ctx->blob);
767 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, type);
768 return jmp;
769 }
770
771 static void
772 write_call(write_ctx *ctx, const nir_call_instr *call)
773 {
774 blob_write_intptr(ctx->blob, write_lookup_object(ctx, call->callee));
775
776 for (unsigned i = 0; i < call->num_params; i++)
777 write_src(ctx, &call->params[i]);
778 }
779
780 static nir_call_instr *
781 read_call(read_ctx *ctx)
782 {
783 nir_function *callee = read_object(ctx);
784 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
785
786 for (unsigned i = 0; i < call->num_params; i++)
787 read_src(ctx, &call->params[i], call);
788
789 return call;
790 }
791
792 static void
793 write_instr(write_ctx *ctx, const nir_instr *instr)
794 {
795 blob_write_uint32(ctx->blob, instr->type);
796 switch (instr->type) {
797 case nir_instr_type_alu:
798 write_alu(ctx, nir_instr_as_alu(instr));
799 break;
800 case nir_instr_type_deref:
801 write_deref(ctx, nir_instr_as_deref(instr));
802 break;
803 case nir_instr_type_intrinsic:
804 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
805 break;
806 case nir_instr_type_load_const:
807 write_load_const(ctx, nir_instr_as_load_const(instr));
808 break;
809 case nir_instr_type_ssa_undef:
810 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
811 break;
812 case nir_instr_type_tex:
813 write_tex(ctx, nir_instr_as_tex(instr));
814 break;
815 case nir_instr_type_phi:
816 write_phi(ctx, nir_instr_as_phi(instr));
817 break;
818 case nir_instr_type_jump:
819 write_jump(ctx, nir_instr_as_jump(instr));
820 break;
821 case nir_instr_type_call:
822 write_call(ctx, nir_instr_as_call(instr));
823 break;
824 case nir_instr_type_parallel_copy:
825 unreachable("Cannot write parallel copies");
826 default:
827 unreachable("bad instr type");
828 }
829 }
830
831 static void
832 read_instr(read_ctx *ctx, nir_block *block)
833 {
834 nir_instr_type type = blob_read_uint32(ctx->blob);
835 nir_instr *instr;
836 switch (type) {
837 case nir_instr_type_alu:
838 instr = &read_alu(ctx)->instr;
839 break;
840 case nir_instr_type_deref:
841 instr = &read_deref(ctx)->instr;
842 break;
843 case nir_instr_type_intrinsic:
844 instr = &read_intrinsic(ctx)->instr;
845 break;
846 case nir_instr_type_load_const:
847 instr = &read_load_const(ctx)->instr;
848 break;
849 case nir_instr_type_ssa_undef:
850 instr = &read_ssa_undef(ctx)->instr;
851 break;
852 case nir_instr_type_tex:
853 instr = &read_tex(ctx)->instr;
854 break;
855 case nir_instr_type_phi:
856 /* Phi instructions are a bit of a special case when reading because we
857 * don't want inserting the instruction to automatically handle use/defs
858 * for us. Instead, we need to wait until all the blocks/instructions
859 * are read so that we can set their sources up.
860 */
861 read_phi(ctx, block);
862 return;
863 case nir_instr_type_jump:
864 instr = &read_jump(ctx)->instr;
865 break;
866 case nir_instr_type_call:
867 instr = &read_call(ctx)->instr;
868 break;
869 case nir_instr_type_parallel_copy:
870 unreachable("Cannot read parallel copies");
871 default:
872 unreachable("bad instr type");
873 }
874
875 nir_instr_insert_after_block(block, instr);
876 }
877
878 static void
879 write_block(write_ctx *ctx, const nir_block *block)
880 {
881 write_add_object(ctx, block);
882 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
883 nir_foreach_instr(instr, block)
884 write_instr(ctx, instr);
885 }
886
887 static void
888 read_block(read_ctx *ctx, struct exec_list *cf_list)
889 {
890 /* Don't actually create a new block. Just use the one from the tail of
891 * the list. NIR guarantees that the tail of the list is a block and that
892 * no two blocks are side-by-side in the IR; It should be empty.
893 */
894 nir_block *block =
895 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
896
897 read_add_object(ctx, block);
898 unsigned num_instrs = blob_read_uint32(ctx->blob);
899 for (unsigned i = 0; i < num_instrs; i++) {
900 read_instr(ctx, block);
901 }
902 }
903
904 static void
905 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
906
907 static void
908 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
909
910 static void
911 write_if(write_ctx *ctx, nir_if *nif)
912 {
913 write_src(ctx, &nif->condition);
914
915 write_cf_list(ctx, &nif->then_list);
916 write_cf_list(ctx, &nif->else_list);
917 }
918
919 static void
920 read_if(read_ctx *ctx, struct exec_list *cf_list)
921 {
922 nir_if *nif = nir_if_create(ctx->nir);
923
924 read_src(ctx, &nif->condition, nif);
925
926 nir_cf_node_insert_end(cf_list, &nif->cf_node);
927
928 read_cf_list(ctx, &nif->then_list);
929 read_cf_list(ctx, &nif->else_list);
930 }
931
932 static void
933 write_loop(write_ctx *ctx, nir_loop *loop)
934 {
935 write_cf_list(ctx, &loop->body);
936 }
937
938 static void
939 read_loop(read_ctx *ctx, struct exec_list *cf_list)
940 {
941 nir_loop *loop = nir_loop_create(ctx->nir);
942
943 nir_cf_node_insert_end(cf_list, &loop->cf_node);
944
945 read_cf_list(ctx, &loop->body);
946 }
947
948 static void
949 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
950 {
951 blob_write_uint32(ctx->blob, cf->type);
952
953 switch (cf->type) {
954 case nir_cf_node_block:
955 write_block(ctx, nir_cf_node_as_block(cf));
956 break;
957 case nir_cf_node_if:
958 write_if(ctx, nir_cf_node_as_if(cf));
959 break;
960 case nir_cf_node_loop:
961 write_loop(ctx, nir_cf_node_as_loop(cf));
962 break;
963 default:
964 unreachable("bad cf type");
965 }
966 }
967
968 static void
969 read_cf_node(read_ctx *ctx, struct exec_list *list)
970 {
971 nir_cf_node_type type = blob_read_uint32(ctx->blob);
972
973 switch (type) {
974 case nir_cf_node_block:
975 read_block(ctx, list);
976 break;
977 case nir_cf_node_if:
978 read_if(ctx, list);
979 break;
980 case nir_cf_node_loop:
981 read_loop(ctx, list);
982 break;
983 default:
984 unreachable("bad cf type");
985 }
986 }
987
988 static void
989 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
990 {
991 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
992 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
993 write_cf_node(ctx, cf);
994 }
995 }
996
997 static void
998 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
999 {
1000 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1001 for (unsigned i = 0; i < num_cf_nodes; i++)
1002 read_cf_node(ctx, cf_list);
1003 }
1004
1005 static void
1006 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1007 {
1008 write_var_list(ctx, &fi->locals);
1009 write_reg_list(ctx, &fi->registers);
1010 blob_write_uint32(ctx->blob, fi->reg_alloc);
1011
1012 write_cf_list(ctx, &fi->body);
1013 write_fixup_phis(ctx);
1014 }
1015
1016 static nir_function_impl *
1017 read_function_impl(read_ctx *ctx, nir_function *fxn)
1018 {
1019 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1020 fi->function = fxn;
1021
1022 read_var_list(ctx, &fi->locals);
1023 read_reg_list(ctx, &fi->registers);
1024 fi->reg_alloc = blob_read_uint32(ctx->blob);
1025
1026 read_cf_list(ctx, &fi->body);
1027 read_fixup_phis(ctx);
1028
1029 fi->valid_metadata = 0;
1030
1031 return fi;
1032 }
1033
1034 static void
1035 write_function(write_ctx *ctx, const nir_function *fxn)
1036 {
1037 blob_write_uint32(ctx->blob, !!(fxn->name));
1038 if (fxn->name)
1039 blob_write_string(ctx->blob, fxn->name);
1040
1041 write_add_object(ctx, fxn);
1042
1043 blob_write_uint32(ctx->blob, fxn->num_params);
1044 for (unsigned i = 0; i < fxn->num_params; i++) {
1045 uint32_t val =
1046 ((uint32_t)fxn->params[i].num_components) |
1047 ((uint32_t)fxn->params[i].bit_size) << 8;
1048 blob_write_uint32(ctx->blob, val);
1049 }
1050
1051 blob_write_uint32(ctx->blob, fxn->is_entrypoint);
1052
1053 /* At first glance, it looks like we should write the function_impl here.
1054 * However, call instructions need to be able to reference at least the
1055 * function and those will get processed as we write the function_impls.
1056 * We stop here and write function_impls as a second pass.
1057 */
1058 }
1059
1060 static void
1061 read_function(read_ctx *ctx)
1062 {
1063 bool has_name = blob_read_uint32(ctx->blob);
1064 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1065
1066 nir_function *fxn = nir_function_create(ctx->nir, name);
1067
1068 read_add_object(ctx, fxn);
1069
1070 fxn->num_params = blob_read_uint32(ctx->blob);
1071 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1072 for (unsigned i = 0; i < fxn->num_params; i++) {
1073 uint32_t val = blob_read_uint32(ctx->blob);
1074 fxn->params[i].num_components = val & 0xff;
1075 fxn->params[i].bit_size = (val >> 8) & 0xff;
1076 }
1077
1078 fxn->is_entrypoint = blob_read_uint32(ctx->blob);
1079 }
1080
1081 void
1082 nir_serialize(struct blob *blob, const nir_shader *nir)
1083 {
1084 write_ctx ctx;
1085 ctx.remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
1086 _mesa_key_pointer_equal);
1087 ctx.next_idx = 0;
1088 ctx.blob = blob;
1089 ctx.nir = nir;
1090 util_dynarray_init(&ctx.phi_fixups, NULL);
1091
1092 size_t idx_size_offset = blob_reserve_intptr(blob);
1093
1094 struct shader_info info = nir->info;
1095 uint32_t strings = 0;
1096 if (info.name)
1097 strings |= 0x1;
1098 if (info.label)
1099 strings |= 0x2;
1100 blob_write_uint32(blob, strings);
1101 if (info.name)
1102 blob_write_string(blob, info.name);
1103 if (info.label)
1104 blob_write_string(blob, info.label);
1105 info.name = info.label = NULL;
1106 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1107
1108 write_var_list(&ctx, &nir->uniforms);
1109 write_var_list(&ctx, &nir->inputs);
1110 write_var_list(&ctx, &nir->outputs);
1111 write_var_list(&ctx, &nir->shared);
1112 write_var_list(&ctx, &nir->globals);
1113 write_var_list(&ctx, &nir->system_values);
1114
1115 write_reg_list(&ctx, &nir->registers);
1116 blob_write_uint32(blob, nir->reg_alloc);
1117 blob_write_uint32(blob, nir->num_inputs);
1118 blob_write_uint32(blob, nir->num_uniforms);
1119 blob_write_uint32(blob, nir->num_outputs);
1120 blob_write_uint32(blob, nir->num_shared);
1121
1122 blob_write_uint32(blob, exec_list_length(&nir->functions));
1123 nir_foreach_function(fxn, nir) {
1124 write_function(&ctx, fxn);
1125 }
1126
1127 nir_foreach_function(fxn, nir) {
1128 write_function_impl(&ctx, fxn->impl);
1129 }
1130
1131 blob_write_uint32(blob, nir->constant_data_size);
1132 if (nir->constant_data_size > 0)
1133 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
1134
1135 *(uintptr_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1136
1137 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1138 util_dynarray_fini(&ctx.phi_fixups);
1139 }
1140
1141 nir_shader *
1142 nir_deserialize(void *mem_ctx,
1143 const struct nir_shader_compiler_options *options,
1144 struct blob_reader *blob)
1145 {
1146 read_ctx ctx;
1147 ctx.blob = blob;
1148 list_inithead(&ctx.phi_srcs);
1149 ctx.idx_table_len = blob_read_intptr(blob);
1150 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1151 ctx.next_idx = 0;
1152
1153 uint32_t strings = blob_read_uint32(blob);
1154 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1155 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1156
1157 struct shader_info info;
1158 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1159
1160 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1161
1162 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1163 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1164
1165 ctx.nir->info = info;
1166
1167 read_var_list(&ctx, &ctx.nir->uniforms);
1168 read_var_list(&ctx, &ctx.nir->inputs);
1169 read_var_list(&ctx, &ctx.nir->outputs);
1170 read_var_list(&ctx, &ctx.nir->shared);
1171 read_var_list(&ctx, &ctx.nir->globals);
1172 read_var_list(&ctx, &ctx.nir->system_values);
1173
1174 read_reg_list(&ctx, &ctx.nir->registers);
1175 ctx.nir->reg_alloc = blob_read_uint32(blob);
1176 ctx.nir->num_inputs = blob_read_uint32(blob);
1177 ctx.nir->num_uniforms = blob_read_uint32(blob);
1178 ctx.nir->num_outputs = blob_read_uint32(blob);
1179 ctx.nir->num_shared = blob_read_uint32(blob);
1180
1181 unsigned num_functions = blob_read_uint32(blob);
1182 for (unsigned i = 0; i < num_functions; i++)
1183 read_function(&ctx);
1184
1185 nir_foreach_function(fxn, ctx.nir)
1186 fxn->impl = read_function_impl(&ctx, fxn);
1187
1188 ctx.nir->constant_data_size = blob_read_uint32(blob);
1189 if (ctx.nir->constant_data_size > 0) {
1190 ctx.nir->constant_data =
1191 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
1192 blob_copy_bytes(blob, ctx.nir->constant_data,
1193 ctx.nir->constant_data_size);
1194 }
1195
1196 free(ctx.idx_table);
1197
1198 return ctx.nir;
1199 }
1200
1201 nir_shader *
1202 nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s)
1203 {
1204 const struct nir_shader_compiler_options *options = s->options;
1205
1206 struct blob writer;
1207 blob_init(&writer);
1208 nir_serialize(&writer, s);
1209 ralloc_free(s);
1210
1211 struct blob_reader reader;
1212 blob_reader_init(&reader, writer.data, writer.size);
1213 nir_shader *ns = nir_deserialize(mem_ctx, options, &reader);
1214
1215 blob_finish(&writer);
1216
1217 return ns;
1218 }