nir/serialize: store 32-bit object IDs instead of 64-bit
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27
28 #define MAX_OBJECT_IDS (1 << 30)
29
30 typedef struct {
31 size_t blob_offset;
32 nir_ssa_def *src;
33 nir_block *block;
34 } write_phi_fixup;
35
36 typedef struct {
37 const nir_shader *nir;
38
39 struct blob *blob;
40
41 /* maps pointer to index */
42 struct hash_table *remap_table;
43
44 /* the next index to assign to a NIR in-memory object */
45 uint32_t next_idx;
46
47 /* Array of write_phi_fixup structs representing phi sources that need to
48 * be resolved in the second pass.
49 */
50 struct util_dynarray phi_fixups;
51 } write_ctx;
52
53 typedef struct {
54 nir_shader *nir;
55
56 struct blob_reader *blob;
57
58 /* the next index to assign to a NIR in-memory object */
59 uint32_t next_idx;
60
61 /* The length of the index -> object table */
62 uint32_t idx_table_len;
63
64 /* map from index to deserialized pointer */
65 void **idx_table;
66
67 /* List of phi sources. */
68 struct list_head phi_srcs;
69
70 } read_ctx;
71
72 static void
73 write_add_object(write_ctx *ctx, const void *obj)
74 {
75 uint32_t index = ctx->next_idx++;
76 assert(index != MAX_OBJECT_IDS);
77 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
78 }
79
80 static uint32_t
81 write_lookup_object(write_ctx *ctx, const void *obj)
82 {
83 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
84 assert(entry);
85 return (uint32_t)(uintptr_t) entry->data;
86 }
87
88 static void
89 write_object(write_ctx *ctx, const void *obj)
90 {
91 blob_write_uint32(ctx->blob, write_lookup_object(ctx, obj));
92 }
93
94 static void
95 read_add_object(read_ctx *ctx, void *obj)
96 {
97 assert(ctx->next_idx < ctx->idx_table_len);
98 ctx->idx_table[ctx->next_idx++] = obj;
99 }
100
101 static void *
102 read_lookup_object(read_ctx *ctx, uint32_t idx)
103 {
104 assert(idx < ctx->idx_table_len);
105 return ctx->idx_table[idx];
106 }
107
108 static void *
109 read_object(read_ctx *ctx)
110 {
111 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
112 }
113
114 static void
115 write_constant(write_ctx *ctx, const nir_constant *c)
116 {
117 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
118 blob_write_uint32(ctx->blob, c->num_elements);
119 for (unsigned i = 0; i < c->num_elements; i++)
120 write_constant(ctx, c->elements[i]);
121 }
122
123 static nir_constant *
124 read_constant(read_ctx *ctx, nir_variable *nvar)
125 {
126 nir_constant *c = ralloc(nvar, nir_constant);
127
128 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
129 c->num_elements = blob_read_uint32(ctx->blob);
130 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
131 for (unsigned i = 0; i < c->num_elements; i++)
132 c->elements[i] = read_constant(ctx, nvar);
133
134 return c;
135 }
136
137 static void
138 write_variable(write_ctx *ctx, const nir_variable *var)
139 {
140 write_add_object(ctx, var);
141 encode_type_to_blob(ctx->blob, var->type);
142 blob_write_uint32(ctx->blob, !!(var->name));
143 if (var->name)
144 blob_write_string(ctx->blob, var->name);
145 blob_write_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
146 blob_write_uint32(ctx->blob, var->num_state_slots);
147 for (unsigned i = 0; i < var->num_state_slots; i++) {
148 blob_write_bytes(ctx->blob, &var->state_slots[i],
149 sizeof(var->state_slots[i]));
150 }
151 blob_write_uint32(ctx->blob, !!(var->constant_initializer));
152 if (var->constant_initializer)
153 write_constant(ctx, var->constant_initializer);
154 blob_write_uint32(ctx->blob, !!(var->interface_type));
155 if (var->interface_type)
156 encode_type_to_blob(ctx->blob, var->interface_type);
157 blob_write_uint32(ctx->blob, var->num_members);
158 if (var->num_members > 0) {
159 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
160 var->num_members * sizeof(*var->members));
161 }
162 }
163
164 static nir_variable *
165 read_variable(read_ctx *ctx)
166 {
167 nir_variable *var = rzalloc(ctx->nir, nir_variable);
168 read_add_object(ctx, var);
169
170 var->type = decode_type_from_blob(ctx->blob);
171 bool has_name = blob_read_uint32(ctx->blob);
172 if (has_name) {
173 const char *name = blob_read_string(ctx->blob);
174 var->name = ralloc_strdup(var, name);
175 } else {
176 var->name = NULL;
177 }
178 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
179 var->num_state_slots = blob_read_uint32(ctx->blob);
180 if (var->num_state_slots != 0) {
181 var->state_slots = ralloc_array(var, nir_state_slot,
182 var->num_state_slots);
183 for (unsigned i = 0; i < var->num_state_slots; i++) {
184 blob_copy_bytes(ctx->blob, &var->state_slots[i],
185 sizeof(var->state_slots[i]));
186 }
187 }
188 bool has_const_initializer = blob_read_uint32(ctx->blob);
189 if (has_const_initializer)
190 var->constant_initializer = read_constant(ctx, var);
191 else
192 var->constant_initializer = NULL;
193 bool has_interface_type = blob_read_uint32(ctx->blob);
194 if (has_interface_type)
195 var->interface_type = decode_type_from_blob(ctx->blob);
196 else
197 var->interface_type = NULL;
198 var->num_members = blob_read_uint32(ctx->blob);
199 if (var->num_members > 0) {
200 var->members = ralloc_array(var, struct nir_variable_data,
201 var->num_members);
202 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
203 var->num_members * sizeof(*var->members));
204 }
205
206 return var;
207 }
208
209 static void
210 write_var_list(write_ctx *ctx, const struct exec_list *src)
211 {
212 blob_write_uint32(ctx->blob, exec_list_length(src));
213 foreach_list_typed(nir_variable, var, node, src) {
214 write_variable(ctx, var);
215 }
216 }
217
218 static void
219 read_var_list(read_ctx *ctx, struct exec_list *dst)
220 {
221 exec_list_make_empty(dst);
222 unsigned num_vars = blob_read_uint32(ctx->blob);
223 for (unsigned i = 0; i < num_vars; i++) {
224 nir_variable *var = read_variable(ctx);
225 exec_list_push_tail(dst, &var->node);
226 }
227 }
228
229 static void
230 write_register(write_ctx *ctx, const nir_register *reg)
231 {
232 write_add_object(ctx, reg);
233 blob_write_uint32(ctx->blob, reg->num_components);
234 blob_write_uint32(ctx->blob, reg->bit_size);
235 blob_write_uint32(ctx->blob, reg->num_array_elems);
236 blob_write_uint32(ctx->blob, reg->index);
237 blob_write_uint32(ctx->blob, !!(reg->name));
238 if (reg->name)
239 blob_write_string(ctx->blob, reg->name);
240 }
241
242 static nir_register *
243 read_register(read_ctx *ctx)
244 {
245 nir_register *reg = ralloc(ctx->nir, nir_register);
246 read_add_object(ctx, reg);
247 reg->num_components = blob_read_uint32(ctx->blob);
248 reg->bit_size = blob_read_uint32(ctx->blob);
249 reg->num_array_elems = blob_read_uint32(ctx->blob);
250 reg->index = blob_read_uint32(ctx->blob);
251 bool has_name = blob_read_uint32(ctx->blob);
252 if (has_name) {
253 const char *name = blob_read_string(ctx->blob);
254 reg->name = ralloc_strdup(reg, name);
255 } else {
256 reg->name = NULL;
257 }
258
259 list_inithead(&reg->uses);
260 list_inithead(&reg->defs);
261 list_inithead(&reg->if_uses);
262
263 return reg;
264 }
265
266 static void
267 write_reg_list(write_ctx *ctx, const struct exec_list *src)
268 {
269 blob_write_uint32(ctx->blob, exec_list_length(src));
270 foreach_list_typed(nir_register, reg, node, src)
271 write_register(ctx, reg);
272 }
273
274 static void
275 read_reg_list(read_ctx *ctx, struct exec_list *dst)
276 {
277 exec_list_make_empty(dst);
278 unsigned num_regs = blob_read_uint32(ctx->blob);
279 for (unsigned i = 0; i < num_regs; i++) {
280 nir_register *reg = read_register(ctx);
281 exec_list_push_tail(dst, &reg->node);
282 }
283 }
284
285 static void
286 write_src(write_ctx *ctx, const nir_src *src)
287 {
288 /* Since sources are very frequent, we try to save some space when storing
289 * them. In particular, we store whether the source is a register and
290 * whether the register has an indirect index in the low two bits. We can
291 * assume that the high two bits of the index are zero, since otherwise our
292 * address space would've been exhausted allocating the remap table!
293 */
294 if (src->is_ssa) {
295 uint32_t idx = write_lookup_object(ctx, src->ssa) << 2;
296 idx |= 1;
297 blob_write_uint32(ctx->blob, idx);
298 } else {
299 uint32_t idx = write_lookup_object(ctx, src->reg.reg) << 2;
300 if (src->reg.indirect)
301 idx |= 2;
302 blob_write_uint32(ctx->blob, idx);
303 blob_write_uint32(ctx->blob, src->reg.base_offset);
304 if (src->reg.indirect) {
305 write_src(ctx, src->reg.indirect);
306 }
307 }
308 }
309
310 static void
311 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
312 {
313 uint32_t val = blob_read_uint32(ctx->blob);
314 uint32_t idx = val >> 2;
315 src->is_ssa = val & 0x1;
316 if (src->is_ssa) {
317 src->ssa = read_lookup_object(ctx, idx);
318 } else {
319 bool is_indirect = val & 0x2;
320 src->reg.reg = read_lookup_object(ctx, idx);
321 src->reg.base_offset = blob_read_uint32(ctx->blob);
322 if (is_indirect) {
323 src->reg.indirect = ralloc(mem_ctx, nir_src);
324 read_src(ctx, src->reg.indirect, mem_ctx);
325 } else {
326 src->reg.indirect = NULL;
327 }
328 }
329 }
330
331 static void
332 write_dest(write_ctx *ctx, const nir_dest *dst)
333 {
334 uint32_t val = dst->is_ssa;
335 if (dst->is_ssa) {
336 val |= !!(dst->ssa.name) << 1;
337 val |= dst->ssa.num_components << 2;
338 val |= dst->ssa.bit_size << 5;
339 } else {
340 val |= !!(dst->reg.indirect) << 1;
341 }
342 blob_write_uint32(ctx->blob, val);
343 if (dst->is_ssa) {
344 write_add_object(ctx, &dst->ssa);
345 if (dst->ssa.name)
346 blob_write_string(ctx->blob, dst->ssa.name);
347 } else {
348 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
349 blob_write_uint32(ctx->blob, dst->reg.base_offset);
350 if (dst->reg.indirect)
351 write_src(ctx, dst->reg.indirect);
352 }
353 }
354
355 static void
356 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr)
357 {
358 uint32_t val = blob_read_uint32(ctx->blob);
359 bool is_ssa = val & 0x1;
360 if (is_ssa) {
361 bool has_name = val & 0x2;
362 unsigned num_components = (val >> 2) & 0x7;
363 unsigned bit_size = val >> 5;
364 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
365 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
366 read_add_object(ctx, &dst->ssa);
367 } else {
368 bool is_indirect = val & 0x2;
369 dst->reg.reg = read_object(ctx);
370 dst->reg.base_offset = blob_read_uint32(ctx->blob);
371 if (is_indirect) {
372 dst->reg.indirect = ralloc(instr, nir_src);
373 read_src(ctx, dst->reg.indirect, instr);
374 }
375 }
376 }
377
378 static void
379 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
380 {
381 blob_write_uint32(ctx->blob, alu->op);
382 uint32_t flags = alu->exact;
383 flags |= alu->no_signed_wrap << 1;
384 flags |= alu->no_unsigned_wrap << 2;
385 flags |= alu->dest.saturate << 3;
386 flags |= alu->dest.write_mask << 4;
387 blob_write_uint32(ctx->blob, flags);
388
389 write_dest(ctx, &alu->dest.dest);
390
391 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
392 write_src(ctx, &alu->src[i].src);
393 flags = alu->src[i].negate;
394 flags |= alu->src[i].abs << 1;
395 for (unsigned j = 0; j < 4; j++)
396 flags |= alu->src[i].swizzle[j] << (2 + 2 * j);
397 blob_write_uint32(ctx->blob, flags);
398 }
399 }
400
401 static nir_alu_instr *
402 read_alu(read_ctx *ctx)
403 {
404 nir_op op = blob_read_uint32(ctx->blob);
405 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, op);
406
407 uint32_t flags = blob_read_uint32(ctx->blob);
408 alu->exact = flags & 1;
409 alu->no_signed_wrap = flags & 2;
410 alu->no_unsigned_wrap = flags & 4;
411 alu->dest.saturate = flags & 8;
412 alu->dest.write_mask = flags >> 4;
413
414 read_dest(ctx, &alu->dest.dest, &alu->instr);
415
416 for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
417 read_src(ctx, &alu->src[i].src, &alu->instr);
418 flags = blob_read_uint32(ctx->blob);
419 alu->src[i].negate = flags & 1;
420 alu->src[i].abs = flags & 2;
421 for (unsigned j = 0; j < 4; j++)
422 alu->src[i].swizzle[j] = (flags >> (2 * j + 2)) & 3;
423 }
424
425 return alu;
426 }
427
428 static void
429 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
430 {
431 blob_write_uint32(ctx->blob, deref->deref_type);
432
433 blob_write_uint32(ctx->blob, deref->mode);
434 encode_type_to_blob(ctx->blob, deref->type);
435
436 write_dest(ctx, &deref->dest);
437
438 if (deref->deref_type == nir_deref_type_var) {
439 write_object(ctx, deref->var);
440 return;
441 }
442
443 write_src(ctx, &deref->parent);
444
445 switch (deref->deref_type) {
446 case nir_deref_type_struct:
447 blob_write_uint32(ctx->blob, deref->strct.index);
448 break;
449
450 case nir_deref_type_array:
451 case nir_deref_type_ptr_as_array:
452 write_src(ctx, &deref->arr.index);
453 break;
454
455 case nir_deref_type_cast:
456 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
457 break;
458
459 case nir_deref_type_array_wildcard:
460 /* Nothing to do */
461 break;
462
463 default:
464 unreachable("Invalid deref type");
465 }
466 }
467
468 static nir_deref_instr *
469 read_deref(read_ctx *ctx)
470 {
471 nir_deref_type deref_type = blob_read_uint32(ctx->blob);
472 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
473
474 deref->mode = blob_read_uint32(ctx->blob);
475 deref->type = decode_type_from_blob(ctx->blob);
476
477 read_dest(ctx, &deref->dest, &deref->instr);
478
479 if (deref_type == nir_deref_type_var) {
480 deref->var = read_object(ctx);
481 return deref;
482 }
483
484 read_src(ctx, &deref->parent, &deref->instr);
485
486 switch (deref->deref_type) {
487 case nir_deref_type_struct:
488 deref->strct.index = blob_read_uint32(ctx->blob);
489 break;
490
491 case nir_deref_type_array:
492 case nir_deref_type_ptr_as_array:
493 read_src(ctx, &deref->arr.index, &deref->instr);
494 break;
495
496 case nir_deref_type_cast:
497 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
498 break;
499
500 case nir_deref_type_array_wildcard:
501 /* Nothing to do */
502 break;
503
504 default:
505 unreachable("Invalid deref type");
506 }
507
508 return deref;
509 }
510
511 static void
512 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
513 {
514 blob_write_uint32(ctx->blob, intrin->intrinsic);
515
516 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
517 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
518
519 blob_write_uint32(ctx->blob, intrin->num_components);
520
521 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
522 write_dest(ctx, &intrin->dest);
523
524 for (unsigned i = 0; i < num_srcs; i++)
525 write_src(ctx, &intrin->src[i]);
526
527 for (unsigned i = 0; i < num_indices; i++)
528 blob_write_uint32(ctx->blob, intrin->const_index[i]);
529 }
530
531 static nir_intrinsic_instr *
532 read_intrinsic(read_ctx *ctx)
533 {
534 nir_intrinsic_op op = blob_read_uint32(ctx->blob);
535
536 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
537
538 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
539 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
540
541 intrin->num_components = blob_read_uint32(ctx->blob);
542
543 if (nir_intrinsic_infos[op].has_dest)
544 read_dest(ctx, &intrin->dest, &intrin->instr);
545
546 for (unsigned i = 0; i < num_srcs; i++)
547 read_src(ctx, &intrin->src[i], &intrin->instr);
548
549 for (unsigned i = 0; i < num_indices; i++)
550 intrin->const_index[i] = blob_read_uint32(ctx->blob);
551
552 return intrin;
553 }
554
555 static void
556 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
557 {
558 uint32_t val = lc->def.num_components;
559 val |= lc->def.bit_size << 3;
560 blob_write_uint32(ctx->blob, val);
561 blob_write_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
562 write_add_object(ctx, &lc->def);
563 }
564
565 static nir_load_const_instr *
566 read_load_const(read_ctx *ctx)
567 {
568 uint32_t val = blob_read_uint32(ctx->blob);
569
570 nir_load_const_instr *lc =
571 nir_load_const_instr_create(ctx->nir, val & 0x7, val >> 3);
572
573 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
574 read_add_object(ctx, &lc->def);
575 return lc;
576 }
577
578 static void
579 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
580 {
581 uint32_t val = undef->def.num_components;
582 val |= undef->def.bit_size << 3;
583 blob_write_uint32(ctx->blob, val);
584 write_add_object(ctx, &undef->def);
585 }
586
587 static nir_ssa_undef_instr *
588 read_ssa_undef(read_ctx *ctx)
589 {
590 uint32_t val = blob_read_uint32(ctx->blob);
591
592 nir_ssa_undef_instr *undef =
593 nir_ssa_undef_instr_create(ctx->nir, val & 0x7, val >> 3);
594
595 read_add_object(ctx, &undef->def);
596 return undef;
597 }
598
599 union packed_tex_data {
600 uint32_t u32;
601 struct {
602 enum glsl_sampler_dim sampler_dim:4;
603 nir_alu_type dest_type:8;
604 unsigned coord_components:3;
605 unsigned is_array:1;
606 unsigned is_shadow:1;
607 unsigned is_new_style_shadow:1;
608 unsigned component:2;
609 unsigned unused:10; /* Mark unused for valgrind. */
610 } u;
611 };
612
613 static void
614 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
615 {
616 blob_write_uint32(ctx->blob, tex->num_srcs);
617 blob_write_uint32(ctx->blob, tex->op);
618 blob_write_uint32(ctx->blob, tex->texture_index);
619 blob_write_uint32(ctx->blob, tex->texture_array_size);
620 blob_write_uint32(ctx->blob, tex->sampler_index);
621 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
622
623 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
624 union packed_tex_data packed = {
625 .u.sampler_dim = tex->sampler_dim,
626 .u.dest_type = tex->dest_type,
627 .u.coord_components = tex->coord_components,
628 .u.is_array = tex->is_array,
629 .u.is_shadow = tex->is_shadow,
630 .u.is_new_style_shadow = tex->is_new_style_shadow,
631 .u.component = tex->component,
632 };
633 blob_write_uint32(ctx->blob, packed.u32);
634
635 write_dest(ctx, &tex->dest);
636 for (unsigned i = 0; i < tex->num_srcs; i++) {
637 blob_write_uint32(ctx->blob, tex->src[i].src_type);
638 write_src(ctx, &tex->src[i].src);
639 }
640 }
641
642 static nir_tex_instr *
643 read_tex(read_ctx *ctx)
644 {
645 unsigned num_srcs = blob_read_uint32(ctx->blob);
646 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, num_srcs);
647
648 tex->op = blob_read_uint32(ctx->blob);
649 tex->texture_index = blob_read_uint32(ctx->blob);
650 tex->texture_array_size = blob_read_uint32(ctx->blob);
651 tex->sampler_index = blob_read_uint32(ctx->blob);
652 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
653
654 union packed_tex_data packed;
655 packed.u32 = blob_read_uint32(ctx->blob);
656 tex->sampler_dim = packed.u.sampler_dim;
657 tex->dest_type = packed.u.dest_type;
658 tex->coord_components = packed.u.coord_components;
659 tex->is_array = packed.u.is_array;
660 tex->is_shadow = packed.u.is_shadow;
661 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
662 tex->component = packed.u.component;
663
664 read_dest(ctx, &tex->dest, &tex->instr);
665 for (unsigned i = 0; i < tex->num_srcs; i++) {
666 tex->src[i].src_type = blob_read_uint32(ctx->blob);
667 read_src(ctx, &tex->src[i].src, &tex->instr);
668 }
669
670 return tex;
671 }
672
673 static void
674 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
675 {
676 /* Phi nodes are special, since they may reference SSA definitions and
677 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
678 * and then store enough information so that a later fixup pass can fill
679 * them in correctly.
680 */
681 write_dest(ctx, &phi->dest);
682
683 blob_write_uint32(ctx->blob, exec_list_length(&phi->srcs));
684
685 nir_foreach_phi_src(src, phi) {
686 assert(src->src.is_ssa);
687 size_t blob_offset = blob_reserve_uint32(ctx->blob);
688 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
689 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
690 write_phi_fixup fixup = {
691 .blob_offset = blob_offset,
692 .src = src->src.ssa,
693 .block = src->pred,
694 };
695 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
696 }
697 }
698
699 static void
700 write_fixup_phis(write_ctx *ctx)
701 {
702 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
703 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
704 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
705 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
706 }
707
708 util_dynarray_clear(&ctx->phi_fixups);
709 }
710
711 static nir_phi_instr *
712 read_phi(read_ctx *ctx, nir_block *blk)
713 {
714 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
715
716 read_dest(ctx, &phi->dest, &phi->instr);
717
718 unsigned num_srcs = blob_read_uint32(ctx->blob);
719
720 /* For similar reasons as before, we just store the index directly into the
721 * pointer, and let a later pass resolve the phi sources.
722 *
723 * In order to ensure that the copied sources (which are just the indices
724 * from the blob for now) don't get inserted into the old shader's use-def
725 * lists, we have to add the phi instruction *before* we set up its
726 * sources.
727 */
728 nir_instr_insert_after_block(blk, &phi->instr);
729
730 for (unsigned i = 0; i < num_srcs; i++) {
731 nir_phi_src *src = ralloc(phi, nir_phi_src);
732
733 src->src.is_ssa = true;
734 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
735 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
736
737 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
738 * we have to set the parent_instr manually. It doesn't really matter
739 * when we do it, so we might as well do it here.
740 */
741 src->src.parent_instr = &phi->instr;
742
743 /* Stash it in the list of phi sources. We'll walk this list and fix up
744 * sources at the very end of read_function_impl.
745 */
746 list_add(&src->src.use_link, &ctx->phi_srcs);
747
748 exec_list_push_tail(&phi->srcs, &src->node);
749 }
750
751 return phi;
752 }
753
754 static void
755 read_fixup_phis(read_ctx *ctx)
756 {
757 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
758 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
759 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
760
761 /* Remove from this list */
762 list_del(&src->src.use_link);
763
764 list_addtail(&src->src.use_link, &src->src.ssa->uses);
765 }
766 assert(list_is_empty(&ctx->phi_srcs));
767 }
768
769 static void
770 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
771 {
772 blob_write_uint32(ctx->blob, jmp->type);
773 }
774
775 static nir_jump_instr *
776 read_jump(read_ctx *ctx)
777 {
778 nir_jump_type type = blob_read_uint32(ctx->blob);
779 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, type);
780 return jmp;
781 }
782
783 static void
784 write_call(write_ctx *ctx, const nir_call_instr *call)
785 {
786 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
787
788 for (unsigned i = 0; i < call->num_params; i++)
789 write_src(ctx, &call->params[i]);
790 }
791
792 static nir_call_instr *
793 read_call(read_ctx *ctx)
794 {
795 nir_function *callee = read_object(ctx);
796 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
797
798 for (unsigned i = 0; i < call->num_params; i++)
799 read_src(ctx, &call->params[i], call);
800
801 return call;
802 }
803
804 static void
805 write_instr(write_ctx *ctx, const nir_instr *instr)
806 {
807 blob_write_uint32(ctx->blob, instr->type);
808 switch (instr->type) {
809 case nir_instr_type_alu:
810 write_alu(ctx, nir_instr_as_alu(instr));
811 break;
812 case nir_instr_type_deref:
813 write_deref(ctx, nir_instr_as_deref(instr));
814 break;
815 case nir_instr_type_intrinsic:
816 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
817 break;
818 case nir_instr_type_load_const:
819 write_load_const(ctx, nir_instr_as_load_const(instr));
820 break;
821 case nir_instr_type_ssa_undef:
822 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
823 break;
824 case nir_instr_type_tex:
825 write_tex(ctx, nir_instr_as_tex(instr));
826 break;
827 case nir_instr_type_phi:
828 write_phi(ctx, nir_instr_as_phi(instr));
829 break;
830 case nir_instr_type_jump:
831 write_jump(ctx, nir_instr_as_jump(instr));
832 break;
833 case nir_instr_type_call:
834 write_call(ctx, nir_instr_as_call(instr));
835 break;
836 case nir_instr_type_parallel_copy:
837 unreachable("Cannot write parallel copies");
838 default:
839 unreachable("bad instr type");
840 }
841 }
842
843 static void
844 read_instr(read_ctx *ctx, nir_block *block)
845 {
846 nir_instr_type type = blob_read_uint32(ctx->blob);
847 nir_instr *instr;
848 switch (type) {
849 case nir_instr_type_alu:
850 instr = &read_alu(ctx)->instr;
851 break;
852 case nir_instr_type_deref:
853 instr = &read_deref(ctx)->instr;
854 break;
855 case nir_instr_type_intrinsic:
856 instr = &read_intrinsic(ctx)->instr;
857 break;
858 case nir_instr_type_load_const:
859 instr = &read_load_const(ctx)->instr;
860 break;
861 case nir_instr_type_ssa_undef:
862 instr = &read_ssa_undef(ctx)->instr;
863 break;
864 case nir_instr_type_tex:
865 instr = &read_tex(ctx)->instr;
866 break;
867 case nir_instr_type_phi:
868 /* Phi instructions are a bit of a special case when reading because we
869 * don't want inserting the instruction to automatically handle use/defs
870 * for us. Instead, we need to wait until all the blocks/instructions
871 * are read so that we can set their sources up.
872 */
873 read_phi(ctx, block);
874 return;
875 case nir_instr_type_jump:
876 instr = &read_jump(ctx)->instr;
877 break;
878 case nir_instr_type_call:
879 instr = &read_call(ctx)->instr;
880 break;
881 case nir_instr_type_parallel_copy:
882 unreachable("Cannot read parallel copies");
883 default:
884 unreachable("bad instr type");
885 }
886
887 nir_instr_insert_after_block(block, instr);
888 }
889
890 static void
891 write_block(write_ctx *ctx, const nir_block *block)
892 {
893 write_add_object(ctx, block);
894 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
895 nir_foreach_instr(instr, block)
896 write_instr(ctx, instr);
897 }
898
899 static void
900 read_block(read_ctx *ctx, struct exec_list *cf_list)
901 {
902 /* Don't actually create a new block. Just use the one from the tail of
903 * the list. NIR guarantees that the tail of the list is a block and that
904 * no two blocks are side-by-side in the IR; It should be empty.
905 */
906 nir_block *block =
907 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
908
909 read_add_object(ctx, block);
910 unsigned num_instrs = blob_read_uint32(ctx->blob);
911 for (unsigned i = 0; i < num_instrs; i++) {
912 read_instr(ctx, block);
913 }
914 }
915
916 static void
917 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
918
919 static void
920 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
921
922 static void
923 write_if(write_ctx *ctx, nir_if *nif)
924 {
925 write_src(ctx, &nif->condition);
926
927 write_cf_list(ctx, &nif->then_list);
928 write_cf_list(ctx, &nif->else_list);
929 }
930
931 static void
932 read_if(read_ctx *ctx, struct exec_list *cf_list)
933 {
934 nir_if *nif = nir_if_create(ctx->nir);
935
936 read_src(ctx, &nif->condition, nif);
937
938 nir_cf_node_insert_end(cf_list, &nif->cf_node);
939
940 read_cf_list(ctx, &nif->then_list);
941 read_cf_list(ctx, &nif->else_list);
942 }
943
944 static void
945 write_loop(write_ctx *ctx, nir_loop *loop)
946 {
947 write_cf_list(ctx, &loop->body);
948 }
949
950 static void
951 read_loop(read_ctx *ctx, struct exec_list *cf_list)
952 {
953 nir_loop *loop = nir_loop_create(ctx->nir);
954
955 nir_cf_node_insert_end(cf_list, &loop->cf_node);
956
957 read_cf_list(ctx, &loop->body);
958 }
959
960 static void
961 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
962 {
963 blob_write_uint32(ctx->blob, cf->type);
964
965 switch (cf->type) {
966 case nir_cf_node_block:
967 write_block(ctx, nir_cf_node_as_block(cf));
968 break;
969 case nir_cf_node_if:
970 write_if(ctx, nir_cf_node_as_if(cf));
971 break;
972 case nir_cf_node_loop:
973 write_loop(ctx, nir_cf_node_as_loop(cf));
974 break;
975 default:
976 unreachable("bad cf type");
977 }
978 }
979
980 static void
981 read_cf_node(read_ctx *ctx, struct exec_list *list)
982 {
983 nir_cf_node_type type = blob_read_uint32(ctx->blob);
984
985 switch (type) {
986 case nir_cf_node_block:
987 read_block(ctx, list);
988 break;
989 case nir_cf_node_if:
990 read_if(ctx, list);
991 break;
992 case nir_cf_node_loop:
993 read_loop(ctx, list);
994 break;
995 default:
996 unreachable("bad cf type");
997 }
998 }
999
1000 static void
1001 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1002 {
1003 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1004 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1005 write_cf_node(ctx, cf);
1006 }
1007 }
1008
1009 static void
1010 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1011 {
1012 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1013 for (unsigned i = 0; i < num_cf_nodes; i++)
1014 read_cf_node(ctx, cf_list);
1015 }
1016
1017 static void
1018 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1019 {
1020 write_var_list(ctx, &fi->locals);
1021 write_reg_list(ctx, &fi->registers);
1022 blob_write_uint32(ctx->blob, fi->reg_alloc);
1023
1024 write_cf_list(ctx, &fi->body);
1025 write_fixup_phis(ctx);
1026 }
1027
1028 static nir_function_impl *
1029 read_function_impl(read_ctx *ctx, nir_function *fxn)
1030 {
1031 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1032 fi->function = fxn;
1033
1034 read_var_list(ctx, &fi->locals);
1035 read_reg_list(ctx, &fi->registers);
1036 fi->reg_alloc = blob_read_uint32(ctx->blob);
1037
1038 read_cf_list(ctx, &fi->body);
1039 read_fixup_phis(ctx);
1040
1041 fi->valid_metadata = 0;
1042
1043 return fi;
1044 }
1045
1046 static void
1047 write_function(write_ctx *ctx, const nir_function *fxn)
1048 {
1049 blob_write_uint32(ctx->blob, !!(fxn->name));
1050 if (fxn->name)
1051 blob_write_string(ctx->blob, fxn->name);
1052
1053 write_add_object(ctx, fxn);
1054
1055 blob_write_uint32(ctx->blob, fxn->num_params);
1056 for (unsigned i = 0; i < fxn->num_params; i++) {
1057 uint32_t val =
1058 ((uint32_t)fxn->params[i].num_components) |
1059 ((uint32_t)fxn->params[i].bit_size) << 8;
1060 blob_write_uint32(ctx->blob, val);
1061 }
1062
1063 blob_write_uint32(ctx->blob, fxn->is_entrypoint);
1064
1065 /* At first glance, it looks like we should write the function_impl here.
1066 * However, call instructions need to be able to reference at least the
1067 * function and those will get processed as we write the function_impls.
1068 * We stop here and write function_impls as a second pass.
1069 */
1070 }
1071
1072 static void
1073 read_function(read_ctx *ctx)
1074 {
1075 bool has_name = blob_read_uint32(ctx->blob);
1076 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1077
1078 nir_function *fxn = nir_function_create(ctx->nir, name);
1079
1080 read_add_object(ctx, fxn);
1081
1082 fxn->num_params = blob_read_uint32(ctx->blob);
1083 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1084 for (unsigned i = 0; i < fxn->num_params; i++) {
1085 uint32_t val = blob_read_uint32(ctx->blob);
1086 fxn->params[i].num_components = val & 0xff;
1087 fxn->params[i].bit_size = (val >> 8) & 0xff;
1088 }
1089
1090 fxn->is_entrypoint = blob_read_uint32(ctx->blob);
1091 }
1092
1093 void
1094 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1095 {
1096 nir_shader *stripped = NULL;
1097
1098 if (strip) {
1099 /* Drop unnecessary information (like variable names), so the serialized
1100 * NIR is smaller, and also to let us detect more isomorphic shaders
1101 * when hashing, increasing cache hits.
1102 */
1103 stripped = nir_shader_clone(NULL, nir);
1104 nir_strip(stripped);
1105 nir = stripped;
1106 }
1107
1108 write_ctx ctx;
1109 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1110 ctx.next_idx = 0;
1111 ctx.blob = blob;
1112 ctx.nir = nir;
1113 util_dynarray_init(&ctx.phi_fixups, NULL);
1114
1115 size_t idx_size_offset = blob_reserve_uint32(blob);
1116
1117 struct shader_info info = nir->info;
1118 uint32_t strings = 0;
1119 if (info.name)
1120 strings |= 0x1;
1121 if (info.label)
1122 strings |= 0x2;
1123 blob_write_uint32(blob, strings);
1124 if (info.name)
1125 blob_write_string(blob, info.name);
1126 if (info.label)
1127 blob_write_string(blob, info.label);
1128 info.name = info.label = NULL;
1129 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1130
1131 write_var_list(&ctx, &nir->uniforms);
1132 write_var_list(&ctx, &nir->inputs);
1133 write_var_list(&ctx, &nir->outputs);
1134 write_var_list(&ctx, &nir->shared);
1135 write_var_list(&ctx, &nir->globals);
1136 write_var_list(&ctx, &nir->system_values);
1137
1138 blob_write_uint32(blob, nir->num_inputs);
1139 blob_write_uint32(blob, nir->num_uniforms);
1140 blob_write_uint32(blob, nir->num_outputs);
1141 blob_write_uint32(blob, nir->num_shared);
1142 blob_write_uint32(blob, nir->scratch_size);
1143
1144 blob_write_uint32(blob, exec_list_length(&nir->functions));
1145 nir_foreach_function(fxn, nir) {
1146 write_function(&ctx, fxn);
1147 }
1148
1149 nir_foreach_function(fxn, nir) {
1150 write_function_impl(&ctx, fxn->impl);
1151 }
1152
1153 blob_write_uint32(blob, nir->constant_data_size);
1154 if (nir->constant_data_size > 0)
1155 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
1156
1157 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1158
1159 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1160 util_dynarray_fini(&ctx.phi_fixups);
1161
1162 if (strip)
1163 ralloc_free(stripped);
1164 }
1165
1166 nir_shader *
1167 nir_deserialize(void *mem_ctx,
1168 const struct nir_shader_compiler_options *options,
1169 struct blob_reader *blob)
1170 {
1171 read_ctx ctx;
1172 ctx.blob = blob;
1173 list_inithead(&ctx.phi_srcs);
1174 ctx.idx_table_len = blob_read_uint32(blob);
1175 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1176 ctx.next_idx = 0;
1177
1178 uint32_t strings = blob_read_uint32(blob);
1179 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1180 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1181
1182 struct shader_info info;
1183 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1184
1185 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1186
1187 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1188 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1189
1190 ctx.nir->info = info;
1191
1192 read_var_list(&ctx, &ctx.nir->uniforms);
1193 read_var_list(&ctx, &ctx.nir->inputs);
1194 read_var_list(&ctx, &ctx.nir->outputs);
1195 read_var_list(&ctx, &ctx.nir->shared);
1196 read_var_list(&ctx, &ctx.nir->globals);
1197 read_var_list(&ctx, &ctx.nir->system_values);
1198
1199 ctx.nir->num_inputs = blob_read_uint32(blob);
1200 ctx.nir->num_uniforms = blob_read_uint32(blob);
1201 ctx.nir->num_outputs = blob_read_uint32(blob);
1202 ctx.nir->num_shared = blob_read_uint32(blob);
1203 ctx.nir->scratch_size = blob_read_uint32(blob);
1204
1205 unsigned num_functions = blob_read_uint32(blob);
1206 for (unsigned i = 0; i < num_functions; i++)
1207 read_function(&ctx);
1208
1209 nir_foreach_function(fxn, ctx.nir)
1210 fxn->impl = read_function_impl(&ctx, fxn);
1211
1212 ctx.nir->constant_data_size = blob_read_uint32(blob);
1213 if (ctx.nir->constant_data_size > 0) {
1214 ctx.nir->constant_data =
1215 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
1216 blob_copy_bytes(blob, ctx.nir->constant_data,
1217 ctx.nir->constant_data_size);
1218 }
1219
1220 free(ctx.idx_table);
1221
1222 return ctx.nir;
1223 }
1224
1225 void
1226 nir_shader_serialize_deserialize(nir_shader *shader)
1227 {
1228 const struct nir_shader_compiler_options *options = shader->options;
1229
1230 struct blob writer;
1231 blob_init(&writer);
1232 nir_serialize(&writer, shader, false);
1233
1234 /* Delete all of dest's ralloc children but leave dest alone */
1235 void *dead_ctx = ralloc_context(NULL);
1236 ralloc_adopt(dead_ctx, shader);
1237 ralloc_free(dead_ctx);
1238
1239 dead_ctx = ralloc_context(NULL);
1240
1241 struct blob_reader reader;
1242 blob_reader_init(&reader, writer.data, writer.size);
1243 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
1244
1245 blob_finish(&writer);
1246
1247 nir_shader_replace(shader, copy);
1248 ralloc_free(dead_ctx);
1249 }