nir/serialize: don't expand 16-bit variable state slots to 32 bits
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27
28 typedef struct {
29 size_t blob_offset;
30 nir_ssa_def *src;
31 nir_block *block;
32 } write_phi_fixup;
33
34 typedef struct {
35 const nir_shader *nir;
36
37 struct blob *blob;
38
39 /* maps pointer to index */
40 struct hash_table *remap_table;
41
42 /* the next index to assign to a NIR in-memory object */
43 uintptr_t next_idx;
44
45 /* Array of write_phi_fixup structs representing phi sources that need to
46 * be resolved in the second pass.
47 */
48 struct util_dynarray phi_fixups;
49 } write_ctx;
50
51 typedef struct {
52 nir_shader *nir;
53
54 struct blob_reader *blob;
55
56 /* the next index to assign to a NIR in-memory object */
57 uintptr_t next_idx;
58
59 /* The length of the index -> object table */
60 uintptr_t idx_table_len;
61
62 /* map from index to deserialized pointer */
63 void **idx_table;
64
65 /* List of phi sources. */
66 struct list_head phi_srcs;
67
68 } read_ctx;
69
70 static void
71 write_add_object(write_ctx *ctx, const void *obj)
72 {
73 uintptr_t index = ctx->next_idx++;
74 _mesa_hash_table_insert(ctx->remap_table, obj, (void *) index);
75 }
76
77 static uintptr_t
78 write_lookup_object(write_ctx *ctx, const void *obj)
79 {
80 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
81 assert(entry);
82 return (uintptr_t) entry->data;
83 }
84
85 static void
86 write_object(write_ctx *ctx, const void *obj)
87 {
88 blob_write_intptr(ctx->blob, write_lookup_object(ctx, obj));
89 }
90
91 static void
92 read_add_object(read_ctx *ctx, void *obj)
93 {
94 assert(ctx->next_idx < ctx->idx_table_len);
95 ctx->idx_table[ctx->next_idx++] = obj;
96 }
97
98 static void *
99 read_lookup_object(read_ctx *ctx, uintptr_t idx)
100 {
101 assert(idx < ctx->idx_table_len);
102 return ctx->idx_table[idx];
103 }
104
105 static void *
106 read_object(read_ctx *ctx)
107 {
108 return read_lookup_object(ctx, blob_read_intptr(ctx->blob));
109 }
110
111 static void
112 write_constant(write_ctx *ctx, const nir_constant *c)
113 {
114 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
115 blob_write_uint32(ctx->blob, c->num_elements);
116 for (unsigned i = 0; i < c->num_elements; i++)
117 write_constant(ctx, c->elements[i]);
118 }
119
120 static nir_constant *
121 read_constant(read_ctx *ctx, nir_variable *nvar)
122 {
123 nir_constant *c = ralloc(nvar, nir_constant);
124
125 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
126 c->num_elements = blob_read_uint32(ctx->blob);
127 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
128 for (unsigned i = 0; i < c->num_elements; i++)
129 c->elements[i] = read_constant(ctx, nvar);
130
131 return c;
132 }
133
134 static void
135 write_variable(write_ctx *ctx, const nir_variable *var)
136 {
137 write_add_object(ctx, var);
138 encode_type_to_blob(ctx->blob, var->type);
139 blob_write_uint32(ctx->blob, !!(var->name));
140 if (var->name)
141 blob_write_string(ctx->blob, var->name);
142 blob_write_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
143 blob_write_uint32(ctx->blob, var->num_state_slots);
144 for (unsigned i = 0; i < var->num_state_slots; i++) {
145 blob_write_bytes(ctx->blob, &var->state_slots[i],
146 sizeof(var->state_slots[i]));
147 }
148 blob_write_uint32(ctx->blob, !!(var->constant_initializer));
149 if (var->constant_initializer)
150 write_constant(ctx, var->constant_initializer);
151 blob_write_uint32(ctx->blob, !!(var->interface_type));
152 if (var->interface_type)
153 encode_type_to_blob(ctx->blob, var->interface_type);
154 blob_write_uint32(ctx->blob, var->num_members);
155 if (var->num_members > 0) {
156 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
157 var->num_members * sizeof(*var->members));
158 }
159 }
160
161 static nir_variable *
162 read_variable(read_ctx *ctx)
163 {
164 nir_variable *var = rzalloc(ctx->nir, nir_variable);
165 read_add_object(ctx, var);
166
167 var->type = decode_type_from_blob(ctx->blob);
168 bool has_name = blob_read_uint32(ctx->blob);
169 if (has_name) {
170 const char *name = blob_read_string(ctx->blob);
171 var->name = ralloc_strdup(var, name);
172 } else {
173 var->name = NULL;
174 }
175 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
176 var->num_state_slots = blob_read_uint32(ctx->blob);
177 if (var->num_state_slots != 0) {
178 var->state_slots = ralloc_array(var, nir_state_slot,
179 var->num_state_slots);
180 for (unsigned i = 0; i < var->num_state_slots; i++) {
181 blob_copy_bytes(ctx->blob, &var->state_slots[i],
182 sizeof(var->state_slots[i]));
183 }
184 }
185 bool has_const_initializer = blob_read_uint32(ctx->blob);
186 if (has_const_initializer)
187 var->constant_initializer = read_constant(ctx, var);
188 else
189 var->constant_initializer = NULL;
190 bool has_interface_type = blob_read_uint32(ctx->blob);
191 if (has_interface_type)
192 var->interface_type = decode_type_from_blob(ctx->blob);
193 else
194 var->interface_type = NULL;
195 var->num_members = blob_read_uint32(ctx->blob);
196 if (var->num_members > 0) {
197 var->members = ralloc_array(var, struct nir_variable_data,
198 var->num_members);
199 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
200 var->num_members * sizeof(*var->members));
201 }
202
203 return var;
204 }
205
206 static void
207 write_var_list(write_ctx *ctx, const struct exec_list *src)
208 {
209 blob_write_uint32(ctx->blob, exec_list_length(src));
210 foreach_list_typed(nir_variable, var, node, src) {
211 write_variable(ctx, var);
212 }
213 }
214
215 static void
216 read_var_list(read_ctx *ctx, struct exec_list *dst)
217 {
218 exec_list_make_empty(dst);
219 unsigned num_vars = blob_read_uint32(ctx->blob);
220 for (unsigned i = 0; i < num_vars; i++) {
221 nir_variable *var = read_variable(ctx);
222 exec_list_push_tail(dst, &var->node);
223 }
224 }
225
226 static void
227 write_register(write_ctx *ctx, const nir_register *reg)
228 {
229 write_add_object(ctx, reg);
230 blob_write_uint32(ctx->blob, reg->num_components);
231 blob_write_uint32(ctx->blob, reg->bit_size);
232 blob_write_uint32(ctx->blob, reg->num_array_elems);
233 blob_write_uint32(ctx->blob, reg->index);
234 blob_write_uint32(ctx->blob, !!(reg->name));
235 if (reg->name)
236 blob_write_string(ctx->blob, reg->name);
237 }
238
239 static nir_register *
240 read_register(read_ctx *ctx)
241 {
242 nir_register *reg = ralloc(ctx->nir, nir_register);
243 read_add_object(ctx, reg);
244 reg->num_components = blob_read_uint32(ctx->blob);
245 reg->bit_size = blob_read_uint32(ctx->blob);
246 reg->num_array_elems = blob_read_uint32(ctx->blob);
247 reg->index = blob_read_uint32(ctx->blob);
248 bool has_name = blob_read_uint32(ctx->blob);
249 if (has_name) {
250 const char *name = blob_read_string(ctx->blob);
251 reg->name = ralloc_strdup(reg, name);
252 } else {
253 reg->name = NULL;
254 }
255
256 list_inithead(&reg->uses);
257 list_inithead(&reg->defs);
258 list_inithead(&reg->if_uses);
259
260 return reg;
261 }
262
263 static void
264 write_reg_list(write_ctx *ctx, const struct exec_list *src)
265 {
266 blob_write_uint32(ctx->blob, exec_list_length(src));
267 foreach_list_typed(nir_register, reg, node, src)
268 write_register(ctx, reg);
269 }
270
271 static void
272 read_reg_list(read_ctx *ctx, struct exec_list *dst)
273 {
274 exec_list_make_empty(dst);
275 unsigned num_regs = blob_read_uint32(ctx->blob);
276 for (unsigned i = 0; i < num_regs; i++) {
277 nir_register *reg = read_register(ctx);
278 exec_list_push_tail(dst, &reg->node);
279 }
280 }
281
282 static void
283 write_src(write_ctx *ctx, const nir_src *src)
284 {
285 /* Since sources are very frequent, we try to save some space when storing
286 * them. In particular, we store whether the source is a register and
287 * whether the register has an indirect index in the low two bits. We can
288 * assume that the high two bits of the index are zero, since otherwise our
289 * address space would've been exhausted allocating the remap table!
290 */
291 if (src->is_ssa) {
292 uintptr_t idx = write_lookup_object(ctx, src->ssa) << 2;
293 idx |= 1;
294 blob_write_intptr(ctx->blob, idx);
295 } else {
296 uintptr_t idx = write_lookup_object(ctx, src->reg.reg) << 2;
297 if (src->reg.indirect)
298 idx |= 2;
299 blob_write_intptr(ctx->blob, idx);
300 blob_write_uint32(ctx->blob, src->reg.base_offset);
301 if (src->reg.indirect) {
302 write_src(ctx, src->reg.indirect);
303 }
304 }
305 }
306
307 static void
308 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
309 {
310 uintptr_t val = blob_read_intptr(ctx->blob);
311 uintptr_t idx = val >> 2;
312 src->is_ssa = val & 0x1;
313 if (src->is_ssa) {
314 src->ssa = read_lookup_object(ctx, idx);
315 } else {
316 bool is_indirect = val & 0x2;
317 src->reg.reg = read_lookup_object(ctx, idx);
318 src->reg.base_offset = blob_read_uint32(ctx->blob);
319 if (is_indirect) {
320 src->reg.indirect = ralloc(mem_ctx, nir_src);
321 read_src(ctx, src->reg.indirect, mem_ctx);
322 } else {
323 src->reg.indirect = NULL;
324 }
325 }
326 }
327
328 static void
329 write_dest(write_ctx *ctx, const nir_dest *dst)
330 {
331 uint32_t val = dst->is_ssa;
332 if (dst->is_ssa) {
333 val |= !!(dst->ssa.name) << 1;
334 val |= dst->ssa.num_components << 2;
335 val |= dst->ssa.bit_size << 5;
336 } else {
337 val |= !!(dst->reg.indirect) << 1;
338 }
339 blob_write_uint32(ctx->blob, val);
340 if (dst->is_ssa) {
341 write_add_object(ctx, &dst->ssa);
342 if (dst->ssa.name)
343 blob_write_string(ctx->blob, dst->ssa.name);
344 } else {
345 blob_write_intptr(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
346 blob_write_uint32(ctx->blob, dst->reg.base_offset);
347 if (dst->reg.indirect)
348 write_src(ctx, dst->reg.indirect);
349 }
350 }
351
352 static void
353 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr)
354 {
355 uint32_t val = blob_read_uint32(ctx->blob);
356 bool is_ssa = val & 0x1;
357 if (is_ssa) {
358 bool has_name = val & 0x2;
359 unsigned num_components = (val >> 2) & 0x7;
360 unsigned bit_size = val >> 5;
361 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
362 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
363 read_add_object(ctx, &dst->ssa);
364 } else {
365 bool is_indirect = val & 0x2;
366 dst->reg.reg = read_object(ctx);
367 dst->reg.base_offset = blob_read_uint32(ctx->blob);
368 if (is_indirect) {
369 dst->reg.indirect = ralloc(instr, nir_src);
370 read_src(ctx, dst->reg.indirect, instr);
371 }
372 }
373 }
374
375 static void
376 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
377 {
378 blob_write_uint32(ctx->blob, alu->op);
379 uint32_t flags = alu->exact;
380 flags |= alu->no_signed_wrap << 1;
381 flags |= alu->no_unsigned_wrap << 2;
382 flags |= alu->dest.saturate << 3;
383 flags |= alu->dest.write_mask << 4;
384 blob_write_uint32(ctx->blob, flags);
385
386 write_dest(ctx, &alu->dest.dest);
387
388 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
389 write_src(ctx, &alu->src[i].src);
390 flags = alu->src[i].negate;
391 flags |= alu->src[i].abs << 1;
392 for (unsigned j = 0; j < 4; j++)
393 flags |= alu->src[i].swizzle[j] << (2 + 2 * j);
394 blob_write_uint32(ctx->blob, flags);
395 }
396 }
397
398 static nir_alu_instr *
399 read_alu(read_ctx *ctx)
400 {
401 nir_op op = blob_read_uint32(ctx->blob);
402 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, op);
403
404 uint32_t flags = blob_read_uint32(ctx->blob);
405 alu->exact = flags & 1;
406 alu->no_signed_wrap = flags & 2;
407 alu->no_unsigned_wrap = flags & 4;
408 alu->dest.saturate = flags & 8;
409 alu->dest.write_mask = flags >> 4;
410
411 read_dest(ctx, &alu->dest.dest, &alu->instr);
412
413 for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
414 read_src(ctx, &alu->src[i].src, &alu->instr);
415 flags = blob_read_uint32(ctx->blob);
416 alu->src[i].negate = flags & 1;
417 alu->src[i].abs = flags & 2;
418 for (unsigned j = 0; j < 4; j++)
419 alu->src[i].swizzle[j] = (flags >> (2 * j + 2)) & 3;
420 }
421
422 return alu;
423 }
424
425 static void
426 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
427 {
428 blob_write_uint32(ctx->blob, deref->deref_type);
429
430 blob_write_uint32(ctx->blob, deref->mode);
431 encode_type_to_blob(ctx->blob, deref->type);
432
433 write_dest(ctx, &deref->dest);
434
435 if (deref->deref_type == nir_deref_type_var) {
436 write_object(ctx, deref->var);
437 return;
438 }
439
440 write_src(ctx, &deref->parent);
441
442 switch (deref->deref_type) {
443 case nir_deref_type_struct:
444 blob_write_uint32(ctx->blob, deref->strct.index);
445 break;
446
447 case nir_deref_type_array:
448 case nir_deref_type_ptr_as_array:
449 write_src(ctx, &deref->arr.index);
450 break;
451
452 case nir_deref_type_cast:
453 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
454 break;
455
456 case nir_deref_type_array_wildcard:
457 /* Nothing to do */
458 break;
459
460 default:
461 unreachable("Invalid deref type");
462 }
463 }
464
465 static nir_deref_instr *
466 read_deref(read_ctx *ctx)
467 {
468 nir_deref_type deref_type = blob_read_uint32(ctx->blob);
469 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
470
471 deref->mode = blob_read_uint32(ctx->blob);
472 deref->type = decode_type_from_blob(ctx->blob);
473
474 read_dest(ctx, &deref->dest, &deref->instr);
475
476 if (deref_type == nir_deref_type_var) {
477 deref->var = read_object(ctx);
478 return deref;
479 }
480
481 read_src(ctx, &deref->parent, &deref->instr);
482
483 switch (deref->deref_type) {
484 case nir_deref_type_struct:
485 deref->strct.index = blob_read_uint32(ctx->blob);
486 break;
487
488 case nir_deref_type_array:
489 case nir_deref_type_ptr_as_array:
490 read_src(ctx, &deref->arr.index, &deref->instr);
491 break;
492
493 case nir_deref_type_cast:
494 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
495 break;
496
497 case nir_deref_type_array_wildcard:
498 /* Nothing to do */
499 break;
500
501 default:
502 unreachable("Invalid deref type");
503 }
504
505 return deref;
506 }
507
508 static void
509 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
510 {
511 blob_write_uint32(ctx->blob, intrin->intrinsic);
512
513 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
514 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
515
516 blob_write_uint32(ctx->blob, intrin->num_components);
517
518 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
519 write_dest(ctx, &intrin->dest);
520
521 for (unsigned i = 0; i < num_srcs; i++)
522 write_src(ctx, &intrin->src[i]);
523
524 for (unsigned i = 0; i < num_indices; i++)
525 blob_write_uint32(ctx->blob, intrin->const_index[i]);
526 }
527
528 static nir_intrinsic_instr *
529 read_intrinsic(read_ctx *ctx)
530 {
531 nir_intrinsic_op op = blob_read_uint32(ctx->blob);
532
533 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
534
535 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
536 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
537
538 intrin->num_components = blob_read_uint32(ctx->blob);
539
540 if (nir_intrinsic_infos[op].has_dest)
541 read_dest(ctx, &intrin->dest, &intrin->instr);
542
543 for (unsigned i = 0; i < num_srcs; i++)
544 read_src(ctx, &intrin->src[i], &intrin->instr);
545
546 for (unsigned i = 0; i < num_indices; i++)
547 intrin->const_index[i] = blob_read_uint32(ctx->blob);
548
549 return intrin;
550 }
551
552 static void
553 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
554 {
555 uint32_t val = lc->def.num_components;
556 val |= lc->def.bit_size << 3;
557 blob_write_uint32(ctx->blob, val);
558 blob_write_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
559 write_add_object(ctx, &lc->def);
560 }
561
562 static nir_load_const_instr *
563 read_load_const(read_ctx *ctx)
564 {
565 uint32_t val = blob_read_uint32(ctx->blob);
566
567 nir_load_const_instr *lc =
568 nir_load_const_instr_create(ctx->nir, val & 0x7, val >> 3);
569
570 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
571 read_add_object(ctx, &lc->def);
572 return lc;
573 }
574
575 static void
576 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
577 {
578 uint32_t val = undef->def.num_components;
579 val |= undef->def.bit_size << 3;
580 blob_write_uint32(ctx->blob, val);
581 write_add_object(ctx, &undef->def);
582 }
583
584 static nir_ssa_undef_instr *
585 read_ssa_undef(read_ctx *ctx)
586 {
587 uint32_t val = blob_read_uint32(ctx->blob);
588
589 nir_ssa_undef_instr *undef =
590 nir_ssa_undef_instr_create(ctx->nir, val & 0x7, val >> 3);
591
592 read_add_object(ctx, &undef->def);
593 return undef;
594 }
595
596 union packed_tex_data {
597 uint32_t u32;
598 struct {
599 enum glsl_sampler_dim sampler_dim:4;
600 nir_alu_type dest_type:8;
601 unsigned coord_components:3;
602 unsigned is_array:1;
603 unsigned is_shadow:1;
604 unsigned is_new_style_shadow:1;
605 unsigned component:2;
606 unsigned unused:10; /* Mark unused for valgrind. */
607 } u;
608 };
609
610 static void
611 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
612 {
613 blob_write_uint32(ctx->blob, tex->num_srcs);
614 blob_write_uint32(ctx->blob, tex->op);
615 blob_write_uint32(ctx->blob, tex->texture_index);
616 blob_write_uint32(ctx->blob, tex->texture_array_size);
617 blob_write_uint32(ctx->blob, tex->sampler_index);
618 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
619
620 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
621 union packed_tex_data packed = {
622 .u.sampler_dim = tex->sampler_dim,
623 .u.dest_type = tex->dest_type,
624 .u.coord_components = tex->coord_components,
625 .u.is_array = tex->is_array,
626 .u.is_shadow = tex->is_shadow,
627 .u.is_new_style_shadow = tex->is_new_style_shadow,
628 .u.component = tex->component,
629 };
630 blob_write_uint32(ctx->blob, packed.u32);
631
632 write_dest(ctx, &tex->dest);
633 for (unsigned i = 0; i < tex->num_srcs; i++) {
634 blob_write_uint32(ctx->blob, tex->src[i].src_type);
635 write_src(ctx, &tex->src[i].src);
636 }
637 }
638
639 static nir_tex_instr *
640 read_tex(read_ctx *ctx)
641 {
642 unsigned num_srcs = blob_read_uint32(ctx->blob);
643 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, num_srcs);
644
645 tex->op = blob_read_uint32(ctx->blob);
646 tex->texture_index = blob_read_uint32(ctx->blob);
647 tex->texture_array_size = blob_read_uint32(ctx->blob);
648 tex->sampler_index = blob_read_uint32(ctx->blob);
649 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
650
651 union packed_tex_data packed;
652 packed.u32 = blob_read_uint32(ctx->blob);
653 tex->sampler_dim = packed.u.sampler_dim;
654 tex->dest_type = packed.u.dest_type;
655 tex->coord_components = packed.u.coord_components;
656 tex->is_array = packed.u.is_array;
657 tex->is_shadow = packed.u.is_shadow;
658 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
659 tex->component = packed.u.component;
660
661 read_dest(ctx, &tex->dest, &tex->instr);
662 for (unsigned i = 0; i < tex->num_srcs; i++) {
663 tex->src[i].src_type = blob_read_uint32(ctx->blob);
664 read_src(ctx, &tex->src[i].src, &tex->instr);
665 }
666
667 return tex;
668 }
669
670 static void
671 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
672 {
673 /* Phi nodes are special, since they may reference SSA definitions and
674 * basic blocks that don't exist yet. We leave two empty uintptr_t's here,
675 * and then store enough information so that a later fixup pass can fill
676 * them in correctly.
677 */
678 write_dest(ctx, &phi->dest);
679
680 blob_write_uint32(ctx->blob, exec_list_length(&phi->srcs));
681
682 nir_foreach_phi_src(src, phi) {
683 assert(src->src.is_ssa);
684 size_t blob_offset = blob_reserve_intptr(ctx->blob);
685 ASSERTED size_t blob_offset2 = blob_reserve_intptr(ctx->blob);
686 assert(blob_offset + sizeof(uintptr_t) == blob_offset2);
687 write_phi_fixup fixup = {
688 .blob_offset = blob_offset,
689 .src = src->src.ssa,
690 .block = src->pred,
691 };
692 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
693 }
694 }
695
696 static void
697 write_fixup_phis(write_ctx *ctx)
698 {
699 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
700 uintptr_t *blob_ptr = (uintptr_t *)(ctx->blob->data + fixup->blob_offset);
701 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
702 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
703 }
704
705 util_dynarray_clear(&ctx->phi_fixups);
706 }
707
708 static nir_phi_instr *
709 read_phi(read_ctx *ctx, nir_block *blk)
710 {
711 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
712
713 read_dest(ctx, &phi->dest, &phi->instr);
714
715 unsigned num_srcs = blob_read_uint32(ctx->blob);
716
717 /* For similar reasons as before, we just store the index directly into the
718 * pointer, and let a later pass resolve the phi sources.
719 *
720 * In order to ensure that the copied sources (which are just the indices
721 * from the blob for now) don't get inserted into the old shader's use-def
722 * lists, we have to add the phi instruction *before* we set up its
723 * sources.
724 */
725 nir_instr_insert_after_block(blk, &phi->instr);
726
727 for (unsigned i = 0; i < num_srcs; i++) {
728 nir_phi_src *src = ralloc(phi, nir_phi_src);
729
730 src->src.is_ssa = true;
731 src->src.ssa = (nir_ssa_def *) blob_read_intptr(ctx->blob);
732 src->pred = (nir_block *) blob_read_intptr(ctx->blob);
733
734 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
735 * we have to set the parent_instr manually. It doesn't really matter
736 * when we do it, so we might as well do it here.
737 */
738 src->src.parent_instr = &phi->instr;
739
740 /* Stash it in the list of phi sources. We'll walk this list and fix up
741 * sources at the very end of read_function_impl.
742 */
743 list_add(&src->src.use_link, &ctx->phi_srcs);
744
745 exec_list_push_tail(&phi->srcs, &src->node);
746 }
747
748 return phi;
749 }
750
751 static void
752 read_fixup_phis(read_ctx *ctx)
753 {
754 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
755 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
756 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
757
758 /* Remove from this list */
759 list_del(&src->src.use_link);
760
761 list_addtail(&src->src.use_link, &src->src.ssa->uses);
762 }
763 assert(list_is_empty(&ctx->phi_srcs));
764 }
765
766 static void
767 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
768 {
769 blob_write_uint32(ctx->blob, jmp->type);
770 }
771
772 static nir_jump_instr *
773 read_jump(read_ctx *ctx)
774 {
775 nir_jump_type type = blob_read_uint32(ctx->blob);
776 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, type);
777 return jmp;
778 }
779
780 static void
781 write_call(write_ctx *ctx, const nir_call_instr *call)
782 {
783 blob_write_intptr(ctx->blob, write_lookup_object(ctx, call->callee));
784
785 for (unsigned i = 0; i < call->num_params; i++)
786 write_src(ctx, &call->params[i]);
787 }
788
789 static nir_call_instr *
790 read_call(read_ctx *ctx)
791 {
792 nir_function *callee = read_object(ctx);
793 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
794
795 for (unsigned i = 0; i < call->num_params; i++)
796 read_src(ctx, &call->params[i], call);
797
798 return call;
799 }
800
801 static void
802 write_instr(write_ctx *ctx, const nir_instr *instr)
803 {
804 blob_write_uint32(ctx->blob, instr->type);
805 switch (instr->type) {
806 case nir_instr_type_alu:
807 write_alu(ctx, nir_instr_as_alu(instr));
808 break;
809 case nir_instr_type_deref:
810 write_deref(ctx, nir_instr_as_deref(instr));
811 break;
812 case nir_instr_type_intrinsic:
813 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
814 break;
815 case nir_instr_type_load_const:
816 write_load_const(ctx, nir_instr_as_load_const(instr));
817 break;
818 case nir_instr_type_ssa_undef:
819 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
820 break;
821 case nir_instr_type_tex:
822 write_tex(ctx, nir_instr_as_tex(instr));
823 break;
824 case nir_instr_type_phi:
825 write_phi(ctx, nir_instr_as_phi(instr));
826 break;
827 case nir_instr_type_jump:
828 write_jump(ctx, nir_instr_as_jump(instr));
829 break;
830 case nir_instr_type_call:
831 write_call(ctx, nir_instr_as_call(instr));
832 break;
833 case nir_instr_type_parallel_copy:
834 unreachable("Cannot write parallel copies");
835 default:
836 unreachable("bad instr type");
837 }
838 }
839
840 static void
841 read_instr(read_ctx *ctx, nir_block *block)
842 {
843 nir_instr_type type = blob_read_uint32(ctx->blob);
844 nir_instr *instr;
845 switch (type) {
846 case nir_instr_type_alu:
847 instr = &read_alu(ctx)->instr;
848 break;
849 case nir_instr_type_deref:
850 instr = &read_deref(ctx)->instr;
851 break;
852 case nir_instr_type_intrinsic:
853 instr = &read_intrinsic(ctx)->instr;
854 break;
855 case nir_instr_type_load_const:
856 instr = &read_load_const(ctx)->instr;
857 break;
858 case nir_instr_type_ssa_undef:
859 instr = &read_ssa_undef(ctx)->instr;
860 break;
861 case nir_instr_type_tex:
862 instr = &read_tex(ctx)->instr;
863 break;
864 case nir_instr_type_phi:
865 /* Phi instructions are a bit of a special case when reading because we
866 * don't want inserting the instruction to automatically handle use/defs
867 * for us. Instead, we need to wait until all the blocks/instructions
868 * are read so that we can set their sources up.
869 */
870 read_phi(ctx, block);
871 return;
872 case nir_instr_type_jump:
873 instr = &read_jump(ctx)->instr;
874 break;
875 case nir_instr_type_call:
876 instr = &read_call(ctx)->instr;
877 break;
878 case nir_instr_type_parallel_copy:
879 unreachable("Cannot read parallel copies");
880 default:
881 unreachable("bad instr type");
882 }
883
884 nir_instr_insert_after_block(block, instr);
885 }
886
887 static void
888 write_block(write_ctx *ctx, const nir_block *block)
889 {
890 write_add_object(ctx, block);
891 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
892 nir_foreach_instr(instr, block)
893 write_instr(ctx, instr);
894 }
895
896 static void
897 read_block(read_ctx *ctx, struct exec_list *cf_list)
898 {
899 /* Don't actually create a new block. Just use the one from the tail of
900 * the list. NIR guarantees that the tail of the list is a block and that
901 * no two blocks are side-by-side in the IR; It should be empty.
902 */
903 nir_block *block =
904 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
905
906 read_add_object(ctx, block);
907 unsigned num_instrs = blob_read_uint32(ctx->blob);
908 for (unsigned i = 0; i < num_instrs; i++) {
909 read_instr(ctx, block);
910 }
911 }
912
913 static void
914 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
915
916 static void
917 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
918
919 static void
920 write_if(write_ctx *ctx, nir_if *nif)
921 {
922 write_src(ctx, &nif->condition);
923
924 write_cf_list(ctx, &nif->then_list);
925 write_cf_list(ctx, &nif->else_list);
926 }
927
928 static void
929 read_if(read_ctx *ctx, struct exec_list *cf_list)
930 {
931 nir_if *nif = nir_if_create(ctx->nir);
932
933 read_src(ctx, &nif->condition, nif);
934
935 nir_cf_node_insert_end(cf_list, &nif->cf_node);
936
937 read_cf_list(ctx, &nif->then_list);
938 read_cf_list(ctx, &nif->else_list);
939 }
940
941 static void
942 write_loop(write_ctx *ctx, nir_loop *loop)
943 {
944 write_cf_list(ctx, &loop->body);
945 }
946
947 static void
948 read_loop(read_ctx *ctx, struct exec_list *cf_list)
949 {
950 nir_loop *loop = nir_loop_create(ctx->nir);
951
952 nir_cf_node_insert_end(cf_list, &loop->cf_node);
953
954 read_cf_list(ctx, &loop->body);
955 }
956
957 static void
958 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
959 {
960 blob_write_uint32(ctx->blob, cf->type);
961
962 switch (cf->type) {
963 case nir_cf_node_block:
964 write_block(ctx, nir_cf_node_as_block(cf));
965 break;
966 case nir_cf_node_if:
967 write_if(ctx, nir_cf_node_as_if(cf));
968 break;
969 case nir_cf_node_loop:
970 write_loop(ctx, nir_cf_node_as_loop(cf));
971 break;
972 default:
973 unreachable("bad cf type");
974 }
975 }
976
977 static void
978 read_cf_node(read_ctx *ctx, struct exec_list *list)
979 {
980 nir_cf_node_type type = blob_read_uint32(ctx->blob);
981
982 switch (type) {
983 case nir_cf_node_block:
984 read_block(ctx, list);
985 break;
986 case nir_cf_node_if:
987 read_if(ctx, list);
988 break;
989 case nir_cf_node_loop:
990 read_loop(ctx, list);
991 break;
992 default:
993 unreachable("bad cf type");
994 }
995 }
996
997 static void
998 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
999 {
1000 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1001 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1002 write_cf_node(ctx, cf);
1003 }
1004 }
1005
1006 static void
1007 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1008 {
1009 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1010 for (unsigned i = 0; i < num_cf_nodes; i++)
1011 read_cf_node(ctx, cf_list);
1012 }
1013
1014 static void
1015 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1016 {
1017 write_var_list(ctx, &fi->locals);
1018 write_reg_list(ctx, &fi->registers);
1019 blob_write_uint32(ctx->blob, fi->reg_alloc);
1020
1021 write_cf_list(ctx, &fi->body);
1022 write_fixup_phis(ctx);
1023 }
1024
1025 static nir_function_impl *
1026 read_function_impl(read_ctx *ctx, nir_function *fxn)
1027 {
1028 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1029 fi->function = fxn;
1030
1031 read_var_list(ctx, &fi->locals);
1032 read_reg_list(ctx, &fi->registers);
1033 fi->reg_alloc = blob_read_uint32(ctx->blob);
1034
1035 read_cf_list(ctx, &fi->body);
1036 read_fixup_phis(ctx);
1037
1038 fi->valid_metadata = 0;
1039
1040 return fi;
1041 }
1042
1043 static void
1044 write_function(write_ctx *ctx, const nir_function *fxn)
1045 {
1046 blob_write_uint32(ctx->blob, !!(fxn->name));
1047 if (fxn->name)
1048 blob_write_string(ctx->blob, fxn->name);
1049
1050 write_add_object(ctx, fxn);
1051
1052 blob_write_uint32(ctx->blob, fxn->num_params);
1053 for (unsigned i = 0; i < fxn->num_params; i++) {
1054 uint32_t val =
1055 ((uint32_t)fxn->params[i].num_components) |
1056 ((uint32_t)fxn->params[i].bit_size) << 8;
1057 blob_write_uint32(ctx->blob, val);
1058 }
1059
1060 blob_write_uint32(ctx->blob, fxn->is_entrypoint);
1061
1062 /* At first glance, it looks like we should write the function_impl here.
1063 * However, call instructions need to be able to reference at least the
1064 * function and those will get processed as we write the function_impls.
1065 * We stop here and write function_impls as a second pass.
1066 */
1067 }
1068
1069 static void
1070 read_function(read_ctx *ctx)
1071 {
1072 bool has_name = blob_read_uint32(ctx->blob);
1073 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1074
1075 nir_function *fxn = nir_function_create(ctx->nir, name);
1076
1077 read_add_object(ctx, fxn);
1078
1079 fxn->num_params = blob_read_uint32(ctx->blob);
1080 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1081 for (unsigned i = 0; i < fxn->num_params; i++) {
1082 uint32_t val = blob_read_uint32(ctx->blob);
1083 fxn->params[i].num_components = val & 0xff;
1084 fxn->params[i].bit_size = (val >> 8) & 0xff;
1085 }
1086
1087 fxn->is_entrypoint = blob_read_uint32(ctx->blob);
1088 }
1089
1090 void
1091 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1092 {
1093 nir_shader *stripped = NULL;
1094
1095 if (strip) {
1096 /* Drop unnecessary information (like variable names), so the serialized
1097 * NIR is smaller, and also to let us detect more isomorphic shaders
1098 * when hashing, increasing cache hits.
1099 */
1100 stripped = nir_shader_clone(NULL, nir);
1101 nir_strip(stripped);
1102 nir = stripped;
1103 }
1104
1105 write_ctx ctx;
1106 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1107 ctx.next_idx = 0;
1108 ctx.blob = blob;
1109 ctx.nir = nir;
1110 util_dynarray_init(&ctx.phi_fixups, NULL);
1111
1112 size_t idx_size_offset = blob_reserve_intptr(blob);
1113
1114 struct shader_info info = nir->info;
1115 uint32_t strings = 0;
1116 if (info.name)
1117 strings |= 0x1;
1118 if (info.label)
1119 strings |= 0x2;
1120 blob_write_uint32(blob, strings);
1121 if (info.name)
1122 blob_write_string(blob, info.name);
1123 if (info.label)
1124 blob_write_string(blob, info.label);
1125 info.name = info.label = NULL;
1126 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1127
1128 write_var_list(&ctx, &nir->uniforms);
1129 write_var_list(&ctx, &nir->inputs);
1130 write_var_list(&ctx, &nir->outputs);
1131 write_var_list(&ctx, &nir->shared);
1132 write_var_list(&ctx, &nir->globals);
1133 write_var_list(&ctx, &nir->system_values);
1134
1135 blob_write_uint32(blob, nir->num_inputs);
1136 blob_write_uint32(blob, nir->num_uniforms);
1137 blob_write_uint32(blob, nir->num_outputs);
1138 blob_write_uint32(blob, nir->num_shared);
1139 blob_write_uint32(blob, nir->scratch_size);
1140
1141 blob_write_uint32(blob, exec_list_length(&nir->functions));
1142 nir_foreach_function(fxn, nir) {
1143 write_function(&ctx, fxn);
1144 }
1145
1146 nir_foreach_function(fxn, nir) {
1147 write_function_impl(&ctx, fxn->impl);
1148 }
1149
1150 blob_write_uint32(blob, nir->constant_data_size);
1151 if (nir->constant_data_size > 0)
1152 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
1153
1154 *(uintptr_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1155
1156 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1157 util_dynarray_fini(&ctx.phi_fixups);
1158
1159 if (strip)
1160 ralloc_free(stripped);
1161 }
1162
1163 nir_shader *
1164 nir_deserialize(void *mem_ctx,
1165 const struct nir_shader_compiler_options *options,
1166 struct blob_reader *blob)
1167 {
1168 read_ctx ctx;
1169 ctx.blob = blob;
1170 list_inithead(&ctx.phi_srcs);
1171 ctx.idx_table_len = blob_read_intptr(blob);
1172 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1173 ctx.next_idx = 0;
1174
1175 uint32_t strings = blob_read_uint32(blob);
1176 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1177 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1178
1179 struct shader_info info;
1180 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1181
1182 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1183
1184 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1185 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1186
1187 ctx.nir->info = info;
1188
1189 read_var_list(&ctx, &ctx.nir->uniforms);
1190 read_var_list(&ctx, &ctx.nir->inputs);
1191 read_var_list(&ctx, &ctx.nir->outputs);
1192 read_var_list(&ctx, &ctx.nir->shared);
1193 read_var_list(&ctx, &ctx.nir->globals);
1194 read_var_list(&ctx, &ctx.nir->system_values);
1195
1196 ctx.nir->num_inputs = blob_read_uint32(blob);
1197 ctx.nir->num_uniforms = blob_read_uint32(blob);
1198 ctx.nir->num_outputs = blob_read_uint32(blob);
1199 ctx.nir->num_shared = blob_read_uint32(blob);
1200 ctx.nir->scratch_size = blob_read_uint32(blob);
1201
1202 unsigned num_functions = blob_read_uint32(blob);
1203 for (unsigned i = 0; i < num_functions; i++)
1204 read_function(&ctx);
1205
1206 nir_foreach_function(fxn, ctx.nir)
1207 fxn->impl = read_function_impl(&ctx, fxn);
1208
1209 ctx.nir->constant_data_size = blob_read_uint32(blob);
1210 if (ctx.nir->constant_data_size > 0) {
1211 ctx.nir->constant_data =
1212 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
1213 blob_copy_bytes(blob, ctx.nir->constant_data,
1214 ctx.nir->constant_data_size);
1215 }
1216
1217 free(ctx.idx_table);
1218
1219 return ctx.nir;
1220 }
1221
1222 void
1223 nir_shader_serialize_deserialize(nir_shader *shader)
1224 {
1225 const struct nir_shader_compiler_options *options = shader->options;
1226
1227 struct blob writer;
1228 blob_init(&writer);
1229 nir_serialize(&writer, shader, false);
1230
1231 /* Delete all of dest's ralloc children but leave dest alone */
1232 void *dead_ctx = ralloc_context(NULL);
1233 ralloc_adopt(dead_ctx, shader);
1234 ralloc_free(dead_ctx);
1235
1236 dead_ctx = ralloc_context(NULL);
1237
1238 struct blob_reader reader;
1239 blob_reader_init(&reader, writer.data, writer.size);
1240 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
1241
1242 blob_finish(&writer);
1243
1244 nir_shader_replace(shader, copy);
1245 ralloc_free(dead_ctx);
1246 }