nir: support lowering clipdist to arrays
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27
28 typedef struct {
29 size_t blob_offset;
30 nir_ssa_def *src;
31 nir_block *block;
32 } write_phi_fixup;
33
34 typedef struct {
35 const nir_shader *nir;
36
37 struct blob *blob;
38
39 /* maps pointer to index */
40 struct hash_table *remap_table;
41
42 /* the next index to assign to a NIR in-memory object */
43 uintptr_t next_idx;
44
45 /* Array of write_phi_fixup structs representing phi sources that need to
46 * be resolved in the second pass.
47 */
48 struct util_dynarray phi_fixups;
49 } write_ctx;
50
51 typedef struct {
52 nir_shader *nir;
53
54 struct blob_reader *blob;
55
56 /* the next index to assign to a NIR in-memory object */
57 uintptr_t next_idx;
58
59 /* The length of the index -> object table */
60 uintptr_t idx_table_len;
61
62 /* map from index to deserialized pointer */
63 void **idx_table;
64
65 /* List of phi sources. */
66 struct list_head phi_srcs;
67
68 } read_ctx;
69
70 static void
71 write_add_object(write_ctx *ctx, const void *obj)
72 {
73 uintptr_t index = ctx->next_idx++;
74 _mesa_hash_table_insert(ctx->remap_table, obj, (void *) index);
75 }
76
77 static uintptr_t
78 write_lookup_object(write_ctx *ctx, const void *obj)
79 {
80 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
81 assert(entry);
82 return (uintptr_t) entry->data;
83 }
84
85 static void
86 write_object(write_ctx *ctx, const void *obj)
87 {
88 blob_write_intptr(ctx->blob, write_lookup_object(ctx, obj));
89 }
90
91 static void
92 read_add_object(read_ctx *ctx, void *obj)
93 {
94 assert(ctx->next_idx < ctx->idx_table_len);
95 ctx->idx_table[ctx->next_idx++] = obj;
96 }
97
98 static void *
99 read_lookup_object(read_ctx *ctx, uintptr_t idx)
100 {
101 assert(idx < ctx->idx_table_len);
102 return ctx->idx_table[idx];
103 }
104
105 static void *
106 read_object(read_ctx *ctx)
107 {
108 return read_lookup_object(ctx, blob_read_intptr(ctx->blob));
109 }
110
111 static void
112 write_constant(write_ctx *ctx, const nir_constant *c)
113 {
114 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
115 blob_write_uint32(ctx->blob, c->num_elements);
116 for (unsigned i = 0; i < c->num_elements; i++)
117 write_constant(ctx, c->elements[i]);
118 }
119
120 static nir_constant *
121 read_constant(read_ctx *ctx, nir_variable *nvar)
122 {
123 nir_constant *c = ralloc(nvar, nir_constant);
124
125 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
126 c->num_elements = blob_read_uint32(ctx->blob);
127 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
128 for (unsigned i = 0; i < c->num_elements; i++)
129 c->elements[i] = read_constant(ctx, nvar);
130
131 return c;
132 }
133
134 static void
135 write_variable(write_ctx *ctx, const nir_variable *var)
136 {
137 write_add_object(ctx, var);
138 encode_type_to_blob(ctx->blob, var->type);
139 blob_write_uint32(ctx->blob, !!(var->name));
140 if (var->name)
141 blob_write_string(ctx->blob, var->name);
142 blob_write_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
143 blob_write_uint32(ctx->blob, var->num_state_slots);
144 for (unsigned i = 0; i < var->num_state_slots; i++) {
145 for (unsigned j = 0; j < STATE_LENGTH; j++)
146 blob_write_uint32(ctx->blob, var->state_slots[i].tokens[j]);
147 blob_write_uint32(ctx->blob, var->state_slots[i].swizzle);
148 }
149 blob_write_uint32(ctx->blob, !!(var->constant_initializer));
150 if (var->constant_initializer)
151 write_constant(ctx, var->constant_initializer);
152 blob_write_uint32(ctx->blob, !!(var->interface_type));
153 if (var->interface_type)
154 encode_type_to_blob(ctx->blob, var->interface_type);
155 blob_write_uint32(ctx->blob, var->num_members);
156 if (var->num_members > 0) {
157 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
158 var->num_members * sizeof(*var->members));
159 }
160 }
161
162 static nir_variable *
163 read_variable(read_ctx *ctx)
164 {
165 nir_variable *var = rzalloc(ctx->nir, nir_variable);
166 read_add_object(ctx, var);
167
168 var->type = decode_type_from_blob(ctx->blob);
169 bool has_name = blob_read_uint32(ctx->blob);
170 if (has_name) {
171 const char *name = blob_read_string(ctx->blob);
172 var->name = ralloc_strdup(var, name);
173 } else {
174 var->name = NULL;
175 }
176 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
177 var->num_state_slots = blob_read_uint32(ctx->blob);
178 if (var->num_state_slots != 0) {
179 var->state_slots = ralloc_array(var, nir_state_slot,
180 var->num_state_slots);
181 for (unsigned i = 0; i < var->num_state_slots; i++) {
182 for (unsigned j = 0; j < STATE_LENGTH; j++)
183 var->state_slots[i].tokens[j] = blob_read_uint32(ctx->blob);
184 var->state_slots[i].swizzle = blob_read_uint32(ctx->blob);
185 }
186 }
187 bool has_const_initializer = blob_read_uint32(ctx->blob);
188 if (has_const_initializer)
189 var->constant_initializer = read_constant(ctx, var);
190 else
191 var->constant_initializer = NULL;
192 bool has_interface_type = blob_read_uint32(ctx->blob);
193 if (has_interface_type)
194 var->interface_type = decode_type_from_blob(ctx->blob);
195 else
196 var->interface_type = NULL;
197 var->num_members = blob_read_uint32(ctx->blob);
198 if (var->num_members > 0) {
199 var->members = ralloc_array(var, struct nir_variable_data,
200 var->num_members);
201 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
202 var->num_members * sizeof(*var->members));
203 }
204
205 return var;
206 }
207
208 static void
209 write_var_list(write_ctx *ctx, const struct exec_list *src)
210 {
211 blob_write_uint32(ctx->blob, exec_list_length(src));
212 foreach_list_typed(nir_variable, var, node, src) {
213 write_variable(ctx, var);
214 }
215 }
216
217 static void
218 read_var_list(read_ctx *ctx, struct exec_list *dst)
219 {
220 exec_list_make_empty(dst);
221 unsigned num_vars = blob_read_uint32(ctx->blob);
222 for (unsigned i = 0; i < num_vars; i++) {
223 nir_variable *var = read_variable(ctx);
224 exec_list_push_tail(dst, &var->node);
225 }
226 }
227
228 static void
229 write_register(write_ctx *ctx, const nir_register *reg)
230 {
231 write_add_object(ctx, reg);
232 blob_write_uint32(ctx->blob, reg->num_components);
233 blob_write_uint32(ctx->blob, reg->bit_size);
234 blob_write_uint32(ctx->blob, reg->num_array_elems);
235 blob_write_uint32(ctx->blob, reg->index);
236 blob_write_uint32(ctx->blob, !!(reg->name));
237 if (reg->name)
238 blob_write_string(ctx->blob, reg->name);
239 }
240
241 static nir_register *
242 read_register(read_ctx *ctx)
243 {
244 nir_register *reg = ralloc(ctx->nir, nir_register);
245 read_add_object(ctx, reg);
246 reg->num_components = blob_read_uint32(ctx->blob);
247 reg->bit_size = blob_read_uint32(ctx->blob);
248 reg->num_array_elems = blob_read_uint32(ctx->blob);
249 reg->index = blob_read_uint32(ctx->blob);
250 bool has_name = blob_read_uint32(ctx->blob);
251 if (has_name) {
252 const char *name = blob_read_string(ctx->blob);
253 reg->name = ralloc_strdup(reg, name);
254 } else {
255 reg->name = NULL;
256 }
257
258 list_inithead(&reg->uses);
259 list_inithead(&reg->defs);
260 list_inithead(&reg->if_uses);
261
262 return reg;
263 }
264
265 static void
266 write_reg_list(write_ctx *ctx, const struct exec_list *src)
267 {
268 blob_write_uint32(ctx->blob, exec_list_length(src));
269 foreach_list_typed(nir_register, reg, node, src)
270 write_register(ctx, reg);
271 }
272
273 static void
274 read_reg_list(read_ctx *ctx, struct exec_list *dst)
275 {
276 exec_list_make_empty(dst);
277 unsigned num_regs = blob_read_uint32(ctx->blob);
278 for (unsigned i = 0; i < num_regs; i++) {
279 nir_register *reg = read_register(ctx);
280 exec_list_push_tail(dst, &reg->node);
281 }
282 }
283
284 static void
285 write_src(write_ctx *ctx, const nir_src *src)
286 {
287 /* Since sources are very frequent, we try to save some space when storing
288 * them. In particular, we store whether the source is a register and
289 * whether the register has an indirect index in the low two bits. We can
290 * assume that the high two bits of the index are zero, since otherwise our
291 * address space would've been exhausted allocating the remap table!
292 */
293 if (src->is_ssa) {
294 uintptr_t idx = write_lookup_object(ctx, src->ssa) << 2;
295 idx |= 1;
296 blob_write_intptr(ctx->blob, idx);
297 } else {
298 uintptr_t idx = write_lookup_object(ctx, src->reg.reg) << 2;
299 if (src->reg.indirect)
300 idx |= 2;
301 blob_write_intptr(ctx->blob, idx);
302 blob_write_uint32(ctx->blob, src->reg.base_offset);
303 if (src->reg.indirect) {
304 write_src(ctx, src->reg.indirect);
305 }
306 }
307 }
308
309 static void
310 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
311 {
312 uintptr_t val = blob_read_intptr(ctx->blob);
313 uintptr_t idx = val >> 2;
314 src->is_ssa = val & 0x1;
315 if (src->is_ssa) {
316 src->ssa = read_lookup_object(ctx, idx);
317 } else {
318 bool is_indirect = val & 0x2;
319 src->reg.reg = read_lookup_object(ctx, idx);
320 src->reg.base_offset = blob_read_uint32(ctx->blob);
321 if (is_indirect) {
322 src->reg.indirect = ralloc(mem_ctx, nir_src);
323 read_src(ctx, src->reg.indirect, mem_ctx);
324 } else {
325 src->reg.indirect = NULL;
326 }
327 }
328 }
329
330 static void
331 write_dest(write_ctx *ctx, const nir_dest *dst)
332 {
333 uint32_t val = dst->is_ssa;
334 if (dst->is_ssa) {
335 val |= !!(dst->ssa.name) << 1;
336 val |= dst->ssa.num_components << 2;
337 val |= dst->ssa.bit_size << 5;
338 } else {
339 val |= !!(dst->reg.indirect) << 1;
340 }
341 blob_write_uint32(ctx->blob, val);
342 if (dst->is_ssa) {
343 write_add_object(ctx, &dst->ssa);
344 if (dst->ssa.name)
345 blob_write_string(ctx->blob, dst->ssa.name);
346 } else {
347 blob_write_intptr(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
348 blob_write_uint32(ctx->blob, dst->reg.base_offset);
349 if (dst->reg.indirect)
350 write_src(ctx, dst->reg.indirect);
351 }
352 }
353
354 static void
355 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr)
356 {
357 uint32_t val = blob_read_uint32(ctx->blob);
358 bool is_ssa = val & 0x1;
359 if (is_ssa) {
360 bool has_name = val & 0x2;
361 unsigned num_components = (val >> 2) & 0x7;
362 unsigned bit_size = val >> 5;
363 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
364 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
365 read_add_object(ctx, &dst->ssa);
366 } else {
367 bool is_indirect = val & 0x2;
368 dst->reg.reg = read_object(ctx);
369 dst->reg.base_offset = blob_read_uint32(ctx->blob);
370 if (is_indirect) {
371 dst->reg.indirect = ralloc(instr, nir_src);
372 read_src(ctx, dst->reg.indirect, instr);
373 }
374 }
375 }
376
377 static void
378 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
379 {
380 blob_write_uint32(ctx->blob, alu->op);
381 uint32_t flags = alu->exact;
382 flags |= alu->no_signed_wrap << 1;
383 flags |= alu->no_unsigned_wrap << 2;
384 flags |= alu->dest.saturate << 3;
385 flags |= alu->dest.write_mask << 4;
386 blob_write_uint32(ctx->blob, flags);
387
388 write_dest(ctx, &alu->dest.dest);
389
390 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
391 write_src(ctx, &alu->src[i].src);
392 flags = alu->src[i].negate;
393 flags |= alu->src[i].abs << 1;
394 for (unsigned j = 0; j < 4; j++)
395 flags |= alu->src[i].swizzle[j] << (2 + 2 * j);
396 blob_write_uint32(ctx->blob, flags);
397 }
398 }
399
400 static nir_alu_instr *
401 read_alu(read_ctx *ctx)
402 {
403 nir_op op = blob_read_uint32(ctx->blob);
404 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, op);
405
406 uint32_t flags = blob_read_uint32(ctx->blob);
407 alu->exact = flags & 1;
408 alu->no_signed_wrap = flags & 2;
409 alu->no_unsigned_wrap = flags & 4;
410 alu->dest.saturate = flags & 8;
411 alu->dest.write_mask = flags >> 4;
412
413 read_dest(ctx, &alu->dest.dest, &alu->instr);
414
415 for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
416 read_src(ctx, &alu->src[i].src, &alu->instr);
417 flags = blob_read_uint32(ctx->blob);
418 alu->src[i].negate = flags & 1;
419 alu->src[i].abs = flags & 2;
420 for (unsigned j = 0; j < 4; j++)
421 alu->src[i].swizzle[j] = (flags >> (2 * j + 2)) & 3;
422 }
423
424 return alu;
425 }
426
427 static void
428 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
429 {
430 blob_write_uint32(ctx->blob, deref->deref_type);
431
432 blob_write_uint32(ctx->blob, deref->mode);
433 encode_type_to_blob(ctx->blob, deref->type);
434
435 write_dest(ctx, &deref->dest);
436
437 if (deref->deref_type == nir_deref_type_var) {
438 write_object(ctx, deref->var);
439 return;
440 }
441
442 write_src(ctx, &deref->parent);
443
444 switch (deref->deref_type) {
445 case nir_deref_type_struct:
446 blob_write_uint32(ctx->blob, deref->strct.index);
447 break;
448
449 case nir_deref_type_array:
450 case nir_deref_type_ptr_as_array:
451 write_src(ctx, &deref->arr.index);
452 break;
453
454 case nir_deref_type_cast:
455 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
456 break;
457
458 case nir_deref_type_array_wildcard:
459 /* Nothing to do */
460 break;
461
462 default:
463 unreachable("Invalid deref type");
464 }
465 }
466
467 static nir_deref_instr *
468 read_deref(read_ctx *ctx)
469 {
470 nir_deref_type deref_type = blob_read_uint32(ctx->blob);
471 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
472
473 deref->mode = blob_read_uint32(ctx->blob);
474 deref->type = decode_type_from_blob(ctx->blob);
475
476 read_dest(ctx, &deref->dest, &deref->instr);
477
478 if (deref_type == nir_deref_type_var) {
479 deref->var = read_object(ctx);
480 return deref;
481 }
482
483 read_src(ctx, &deref->parent, &deref->instr);
484
485 switch (deref->deref_type) {
486 case nir_deref_type_struct:
487 deref->strct.index = blob_read_uint32(ctx->blob);
488 break;
489
490 case nir_deref_type_array:
491 case nir_deref_type_ptr_as_array:
492 read_src(ctx, &deref->arr.index, &deref->instr);
493 break;
494
495 case nir_deref_type_cast:
496 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
497 break;
498
499 case nir_deref_type_array_wildcard:
500 /* Nothing to do */
501 break;
502
503 default:
504 unreachable("Invalid deref type");
505 }
506
507 return deref;
508 }
509
510 static void
511 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
512 {
513 blob_write_uint32(ctx->blob, intrin->intrinsic);
514
515 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
516 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
517
518 blob_write_uint32(ctx->blob, intrin->num_components);
519
520 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
521 write_dest(ctx, &intrin->dest);
522
523 for (unsigned i = 0; i < num_srcs; i++)
524 write_src(ctx, &intrin->src[i]);
525
526 for (unsigned i = 0; i < num_indices; i++)
527 blob_write_uint32(ctx->blob, intrin->const_index[i]);
528 }
529
530 static nir_intrinsic_instr *
531 read_intrinsic(read_ctx *ctx)
532 {
533 nir_intrinsic_op op = blob_read_uint32(ctx->blob);
534
535 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
536
537 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
538 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
539
540 intrin->num_components = blob_read_uint32(ctx->blob);
541
542 if (nir_intrinsic_infos[op].has_dest)
543 read_dest(ctx, &intrin->dest, &intrin->instr);
544
545 for (unsigned i = 0; i < num_srcs; i++)
546 read_src(ctx, &intrin->src[i], &intrin->instr);
547
548 for (unsigned i = 0; i < num_indices; i++)
549 intrin->const_index[i] = blob_read_uint32(ctx->blob);
550
551 return intrin;
552 }
553
554 static void
555 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
556 {
557 uint32_t val = lc->def.num_components;
558 val |= lc->def.bit_size << 3;
559 blob_write_uint32(ctx->blob, val);
560 blob_write_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
561 write_add_object(ctx, &lc->def);
562 }
563
564 static nir_load_const_instr *
565 read_load_const(read_ctx *ctx)
566 {
567 uint32_t val = blob_read_uint32(ctx->blob);
568
569 nir_load_const_instr *lc =
570 nir_load_const_instr_create(ctx->nir, val & 0x7, val >> 3);
571
572 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
573 read_add_object(ctx, &lc->def);
574 return lc;
575 }
576
577 static void
578 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
579 {
580 uint32_t val = undef->def.num_components;
581 val |= undef->def.bit_size << 3;
582 blob_write_uint32(ctx->blob, val);
583 write_add_object(ctx, &undef->def);
584 }
585
586 static nir_ssa_undef_instr *
587 read_ssa_undef(read_ctx *ctx)
588 {
589 uint32_t val = blob_read_uint32(ctx->blob);
590
591 nir_ssa_undef_instr *undef =
592 nir_ssa_undef_instr_create(ctx->nir, val & 0x7, val >> 3);
593
594 read_add_object(ctx, &undef->def);
595 return undef;
596 }
597
598 union packed_tex_data {
599 uint32_t u32;
600 struct {
601 enum glsl_sampler_dim sampler_dim:4;
602 nir_alu_type dest_type:8;
603 unsigned coord_components:3;
604 unsigned is_array:1;
605 unsigned is_shadow:1;
606 unsigned is_new_style_shadow:1;
607 unsigned component:2;
608 unsigned unused:10; /* Mark unused for valgrind. */
609 } u;
610 };
611
612 static void
613 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
614 {
615 blob_write_uint32(ctx->blob, tex->num_srcs);
616 blob_write_uint32(ctx->blob, tex->op);
617 blob_write_uint32(ctx->blob, tex->texture_index);
618 blob_write_uint32(ctx->blob, tex->texture_array_size);
619 blob_write_uint32(ctx->blob, tex->sampler_index);
620 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
621
622 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
623 union packed_tex_data packed = {
624 .u.sampler_dim = tex->sampler_dim,
625 .u.dest_type = tex->dest_type,
626 .u.coord_components = tex->coord_components,
627 .u.is_array = tex->is_array,
628 .u.is_shadow = tex->is_shadow,
629 .u.is_new_style_shadow = tex->is_new_style_shadow,
630 .u.component = tex->component,
631 };
632 blob_write_uint32(ctx->blob, packed.u32);
633
634 write_dest(ctx, &tex->dest);
635 for (unsigned i = 0; i < tex->num_srcs; i++) {
636 blob_write_uint32(ctx->blob, tex->src[i].src_type);
637 write_src(ctx, &tex->src[i].src);
638 }
639 }
640
641 static nir_tex_instr *
642 read_tex(read_ctx *ctx)
643 {
644 unsigned num_srcs = blob_read_uint32(ctx->blob);
645 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, num_srcs);
646
647 tex->op = blob_read_uint32(ctx->blob);
648 tex->texture_index = blob_read_uint32(ctx->blob);
649 tex->texture_array_size = blob_read_uint32(ctx->blob);
650 tex->sampler_index = blob_read_uint32(ctx->blob);
651 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
652
653 union packed_tex_data packed;
654 packed.u32 = blob_read_uint32(ctx->blob);
655 tex->sampler_dim = packed.u.sampler_dim;
656 tex->dest_type = packed.u.dest_type;
657 tex->coord_components = packed.u.coord_components;
658 tex->is_array = packed.u.is_array;
659 tex->is_shadow = packed.u.is_shadow;
660 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
661 tex->component = packed.u.component;
662
663 read_dest(ctx, &tex->dest, &tex->instr);
664 for (unsigned i = 0; i < tex->num_srcs; i++) {
665 tex->src[i].src_type = blob_read_uint32(ctx->blob);
666 read_src(ctx, &tex->src[i].src, &tex->instr);
667 }
668
669 return tex;
670 }
671
672 static void
673 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
674 {
675 /* Phi nodes are special, since they may reference SSA definitions and
676 * basic blocks that don't exist yet. We leave two empty uintptr_t's here,
677 * and then store enough information so that a later fixup pass can fill
678 * them in correctly.
679 */
680 write_dest(ctx, &phi->dest);
681
682 blob_write_uint32(ctx->blob, exec_list_length(&phi->srcs));
683
684 nir_foreach_phi_src(src, phi) {
685 assert(src->src.is_ssa);
686 size_t blob_offset = blob_reserve_intptr(ctx->blob);
687 ASSERTED size_t blob_offset2 = blob_reserve_intptr(ctx->blob);
688 assert(blob_offset + sizeof(uintptr_t) == blob_offset2);
689 write_phi_fixup fixup = {
690 .blob_offset = blob_offset,
691 .src = src->src.ssa,
692 .block = src->pred,
693 };
694 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
695 }
696 }
697
698 static void
699 write_fixup_phis(write_ctx *ctx)
700 {
701 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
702 uintptr_t *blob_ptr = (uintptr_t *)(ctx->blob->data + fixup->blob_offset);
703 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
704 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
705 }
706
707 util_dynarray_clear(&ctx->phi_fixups);
708 }
709
710 static nir_phi_instr *
711 read_phi(read_ctx *ctx, nir_block *blk)
712 {
713 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
714
715 read_dest(ctx, &phi->dest, &phi->instr);
716
717 unsigned num_srcs = blob_read_uint32(ctx->blob);
718
719 /* For similar reasons as before, we just store the index directly into the
720 * pointer, and let a later pass resolve the phi sources.
721 *
722 * In order to ensure that the copied sources (which are just the indices
723 * from the blob for now) don't get inserted into the old shader's use-def
724 * lists, we have to add the phi instruction *before* we set up its
725 * sources.
726 */
727 nir_instr_insert_after_block(blk, &phi->instr);
728
729 for (unsigned i = 0; i < num_srcs; i++) {
730 nir_phi_src *src = ralloc(phi, nir_phi_src);
731
732 src->src.is_ssa = true;
733 src->src.ssa = (nir_ssa_def *) blob_read_intptr(ctx->blob);
734 src->pred = (nir_block *) blob_read_intptr(ctx->blob);
735
736 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
737 * we have to set the parent_instr manually. It doesn't really matter
738 * when we do it, so we might as well do it here.
739 */
740 src->src.parent_instr = &phi->instr;
741
742 /* Stash it in the list of phi sources. We'll walk this list and fix up
743 * sources at the very end of read_function_impl.
744 */
745 list_add(&src->src.use_link, &ctx->phi_srcs);
746
747 exec_list_push_tail(&phi->srcs, &src->node);
748 }
749
750 return phi;
751 }
752
753 static void
754 read_fixup_phis(read_ctx *ctx)
755 {
756 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
757 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
758 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
759
760 /* Remove from this list */
761 list_del(&src->src.use_link);
762
763 list_addtail(&src->src.use_link, &src->src.ssa->uses);
764 }
765 assert(list_empty(&ctx->phi_srcs));
766 }
767
768 static void
769 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
770 {
771 blob_write_uint32(ctx->blob, jmp->type);
772 }
773
774 static nir_jump_instr *
775 read_jump(read_ctx *ctx)
776 {
777 nir_jump_type type = blob_read_uint32(ctx->blob);
778 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, type);
779 return jmp;
780 }
781
782 static void
783 write_call(write_ctx *ctx, const nir_call_instr *call)
784 {
785 blob_write_intptr(ctx->blob, write_lookup_object(ctx, call->callee));
786
787 for (unsigned i = 0; i < call->num_params; i++)
788 write_src(ctx, &call->params[i]);
789 }
790
791 static nir_call_instr *
792 read_call(read_ctx *ctx)
793 {
794 nir_function *callee = read_object(ctx);
795 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
796
797 for (unsigned i = 0; i < call->num_params; i++)
798 read_src(ctx, &call->params[i], call);
799
800 return call;
801 }
802
803 static void
804 write_instr(write_ctx *ctx, const nir_instr *instr)
805 {
806 blob_write_uint32(ctx->blob, instr->type);
807 switch (instr->type) {
808 case nir_instr_type_alu:
809 write_alu(ctx, nir_instr_as_alu(instr));
810 break;
811 case nir_instr_type_deref:
812 write_deref(ctx, nir_instr_as_deref(instr));
813 break;
814 case nir_instr_type_intrinsic:
815 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
816 break;
817 case nir_instr_type_load_const:
818 write_load_const(ctx, nir_instr_as_load_const(instr));
819 break;
820 case nir_instr_type_ssa_undef:
821 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
822 break;
823 case nir_instr_type_tex:
824 write_tex(ctx, nir_instr_as_tex(instr));
825 break;
826 case nir_instr_type_phi:
827 write_phi(ctx, nir_instr_as_phi(instr));
828 break;
829 case nir_instr_type_jump:
830 write_jump(ctx, nir_instr_as_jump(instr));
831 break;
832 case nir_instr_type_call:
833 write_call(ctx, nir_instr_as_call(instr));
834 break;
835 case nir_instr_type_parallel_copy:
836 unreachable("Cannot write parallel copies");
837 default:
838 unreachable("bad instr type");
839 }
840 }
841
842 static void
843 read_instr(read_ctx *ctx, nir_block *block)
844 {
845 nir_instr_type type = blob_read_uint32(ctx->blob);
846 nir_instr *instr;
847 switch (type) {
848 case nir_instr_type_alu:
849 instr = &read_alu(ctx)->instr;
850 break;
851 case nir_instr_type_deref:
852 instr = &read_deref(ctx)->instr;
853 break;
854 case nir_instr_type_intrinsic:
855 instr = &read_intrinsic(ctx)->instr;
856 break;
857 case nir_instr_type_load_const:
858 instr = &read_load_const(ctx)->instr;
859 break;
860 case nir_instr_type_ssa_undef:
861 instr = &read_ssa_undef(ctx)->instr;
862 break;
863 case nir_instr_type_tex:
864 instr = &read_tex(ctx)->instr;
865 break;
866 case nir_instr_type_phi:
867 /* Phi instructions are a bit of a special case when reading because we
868 * don't want inserting the instruction to automatically handle use/defs
869 * for us. Instead, we need to wait until all the blocks/instructions
870 * are read so that we can set their sources up.
871 */
872 read_phi(ctx, block);
873 return;
874 case nir_instr_type_jump:
875 instr = &read_jump(ctx)->instr;
876 break;
877 case nir_instr_type_call:
878 instr = &read_call(ctx)->instr;
879 break;
880 case nir_instr_type_parallel_copy:
881 unreachable("Cannot read parallel copies");
882 default:
883 unreachable("bad instr type");
884 }
885
886 nir_instr_insert_after_block(block, instr);
887 }
888
889 static void
890 write_block(write_ctx *ctx, const nir_block *block)
891 {
892 write_add_object(ctx, block);
893 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
894 nir_foreach_instr(instr, block)
895 write_instr(ctx, instr);
896 }
897
898 static void
899 read_block(read_ctx *ctx, struct exec_list *cf_list)
900 {
901 /* Don't actually create a new block. Just use the one from the tail of
902 * the list. NIR guarantees that the tail of the list is a block and that
903 * no two blocks are side-by-side in the IR; It should be empty.
904 */
905 nir_block *block =
906 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
907
908 read_add_object(ctx, block);
909 unsigned num_instrs = blob_read_uint32(ctx->blob);
910 for (unsigned i = 0; i < num_instrs; i++) {
911 read_instr(ctx, block);
912 }
913 }
914
915 static void
916 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
917
918 static void
919 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
920
921 static void
922 write_if(write_ctx *ctx, nir_if *nif)
923 {
924 write_src(ctx, &nif->condition);
925
926 write_cf_list(ctx, &nif->then_list);
927 write_cf_list(ctx, &nif->else_list);
928 }
929
930 static void
931 read_if(read_ctx *ctx, struct exec_list *cf_list)
932 {
933 nir_if *nif = nir_if_create(ctx->nir);
934
935 read_src(ctx, &nif->condition, nif);
936
937 nir_cf_node_insert_end(cf_list, &nif->cf_node);
938
939 read_cf_list(ctx, &nif->then_list);
940 read_cf_list(ctx, &nif->else_list);
941 }
942
943 static void
944 write_loop(write_ctx *ctx, nir_loop *loop)
945 {
946 write_cf_list(ctx, &loop->body);
947 }
948
949 static void
950 read_loop(read_ctx *ctx, struct exec_list *cf_list)
951 {
952 nir_loop *loop = nir_loop_create(ctx->nir);
953
954 nir_cf_node_insert_end(cf_list, &loop->cf_node);
955
956 read_cf_list(ctx, &loop->body);
957 }
958
959 static void
960 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
961 {
962 blob_write_uint32(ctx->blob, cf->type);
963
964 switch (cf->type) {
965 case nir_cf_node_block:
966 write_block(ctx, nir_cf_node_as_block(cf));
967 break;
968 case nir_cf_node_if:
969 write_if(ctx, nir_cf_node_as_if(cf));
970 break;
971 case nir_cf_node_loop:
972 write_loop(ctx, nir_cf_node_as_loop(cf));
973 break;
974 default:
975 unreachable("bad cf type");
976 }
977 }
978
979 static void
980 read_cf_node(read_ctx *ctx, struct exec_list *list)
981 {
982 nir_cf_node_type type = blob_read_uint32(ctx->blob);
983
984 switch (type) {
985 case nir_cf_node_block:
986 read_block(ctx, list);
987 break;
988 case nir_cf_node_if:
989 read_if(ctx, list);
990 break;
991 case nir_cf_node_loop:
992 read_loop(ctx, list);
993 break;
994 default:
995 unreachable("bad cf type");
996 }
997 }
998
999 static void
1000 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1001 {
1002 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1003 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1004 write_cf_node(ctx, cf);
1005 }
1006 }
1007
1008 static void
1009 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1010 {
1011 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1012 for (unsigned i = 0; i < num_cf_nodes; i++)
1013 read_cf_node(ctx, cf_list);
1014 }
1015
1016 static void
1017 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1018 {
1019 write_var_list(ctx, &fi->locals);
1020 write_reg_list(ctx, &fi->registers);
1021 blob_write_uint32(ctx->blob, fi->reg_alloc);
1022
1023 write_cf_list(ctx, &fi->body);
1024 write_fixup_phis(ctx);
1025 }
1026
1027 static nir_function_impl *
1028 read_function_impl(read_ctx *ctx, nir_function *fxn)
1029 {
1030 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1031 fi->function = fxn;
1032
1033 read_var_list(ctx, &fi->locals);
1034 read_reg_list(ctx, &fi->registers);
1035 fi->reg_alloc = blob_read_uint32(ctx->blob);
1036
1037 read_cf_list(ctx, &fi->body);
1038 read_fixup_phis(ctx);
1039
1040 fi->valid_metadata = 0;
1041
1042 return fi;
1043 }
1044
1045 static void
1046 write_function(write_ctx *ctx, const nir_function *fxn)
1047 {
1048 blob_write_uint32(ctx->blob, !!(fxn->name));
1049 if (fxn->name)
1050 blob_write_string(ctx->blob, fxn->name);
1051
1052 write_add_object(ctx, fxn);
1053
1054 blob_write_uint32(ctx->blob, fxn->num_params);
1055 for (unsigned i = 0; i < fxn->num_params; i++) {
1056 uint32_t val =
1057 ((uint32_t)fxn->params[i].num_components) |
1058 ((uint32_t)fxn->params[i].bit_size) << 8;
1059 blob_write_uint32(ctx->blob, val);
1060 }
1061
1062 blob_write_uint32(ctx->blob, fxn->is_entrypoint);
1063
1064 /* At first glance, it looks like we should write the function_impl here.
1065 * However, call instructions need to be able to reference at least the
1066 * function and those will get processed as we write the function_impls.
1067 * We stop here and write function_impls as a second pass.
1068 */
1069 }
1070
1071 static void
1072 read_function(read_ctx *ctx)
1073 {
1074 bool has_name = blob_read_uint32(ctx->blob);
1075 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1076
1077 nir_function *fxn = nir_function_create(ctx->nir, name);
1078
1079 read_add_object(ctx, fxn);
1080
1081 fxn->num_params = blob_read_uint32(ctx->blob);
1082 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1083 for (unsigned i = 0; i < fxn->num_params; i++) {
1084 uint32_t val = blob_read_uint32(ctx->blob);
1085 fxn->params[i].num_components = val & 0xff;
1086 fxn->params[i].bit_size = (val >> 8) & 0xff;
1087 }
1088
1089 fxn->is_entrypoint = blob_read_uint32(ctx->blob);
1090 }
1091
1092 void
1093 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1094 {
1095 nir_shader *stripped = NULL;
1096
1097 if (strip) {
1098 /* Drop unnecessary information (like variable names), so the serialized
1099 * NIR is smaller, and also to let us detect more isomorphic shaders
1100 * when hashing, increasing cache hits.
1101 */
1102 stripped = nir_shader_clone(NULL, nir);
1103 nir_strip(stripped);
1104 nir = stripped;
1105 }
1106
1107 write_ctx ctx;
1108 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1109 ctx.next_idx = 0;
1110 ctx.blob = blob;
1111 ctx.nir = nir;
1112 util_dynarray_init(&ctx.phi_fixups, NULL);
1113
1114 size_t idx_size_offset = blob_reserve_intptr(blob);
1115
1116 struct shader_info info = nir->info;
1117 uint32_t strings = 0;
1118 if (info.name)
1119 strings |= 0x1;
1120 if (info.label)
1121 strings |= 0x2;
1122 blob_write_uint32(blob, strings);
1123 if (info.name)
1124 blob_write_string(blob, info.name);
1125 if (info.label)
1126 blob_write_string(blob, info.label);
1127 info.name = info.label = NULL;
1128 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1129
1130 write_var_list(&ctx, &nir->uniforms);
1131 write_var_list(&ctx, &nir->inputs);
1132 write_var_list(&ctx, &nir->outputs);
1133 write_var_list(&ctx, &nir->shared);
1134 write_var_list(&ctx, &nir->globals);
1135 write_var_list(&ctx, &nir->system_values);
1136
1137 blob_write_uint32(blob, nir->num_inputs);
1138 blob_write_uint32(blob, nir->num_uniforms);
1139 blob_write_uint32(blob, nir->num_outputs);
1140 blob_write_uint32(blob, nir->num_shared);
1141 blob_write_uint32(blob, nir->scratch_size);
1142
1143 blob_write_uint32(blob, exec_list_length(&nir->functions));
1144 nir_foreach_function(fxn, nir) {
1145 write_function(&ctx, fxn);
1146 }
1147
1148 nir_foreach_function(fxn, nir) {
1149 write_function_impl(&ctx, fxn->impl);
1150 }
1151
1152 blob_write_uint32(blob, nir->constant_data_size);
1153 if (nir->constant_data_size > 0)
1154 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
1155
1156 *(uintptr_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1157
1158 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1159 util_dynarray_fini(&ctx.phi_fixups);
1160
1161 if (strip)
1162 ralloc_free(stripped);
1163 }
1164
1165 nir_shader *
1166 nir_deserialize(void *mem_ctx,
1167 const struct nir_shader_compiler_options *options,
1168 struct blob_reader *blob)
1169 {
1170 read_ctx ctx;
1171 ctx.blob = blob;
1172 list_inithead(&ctx.phi_srcs);
1173 ctx.idx_table_len = blob_read_intptr(blob);
1174 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1175 ctx.next_idx = 0;
1176
1177 uint32_t strings = blob_read_uint32(blob);
1178 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1179 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1180
1181 struct shader_info info;
1182 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1183
1184 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1185
1186 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1187 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1188
1189 ctx.nir->info = info;
1190
1191 read_var_list(&ctx, &ctx.nir->uniforms);
1192 read_var_list(&ctx, &ctx.nir->inputs);
1193 read_var_list(&ctx, &ctx.nir->outputs);
1194 read_var_list(&ctx, &ctx.nir->shared);
1195 read_var_list(&ctx, &ctx.nir->globals);
1196 read_var_list(&ctx, &ctx.nir->system_values);
1197
1198 ctx.nir->num_inputs = blob_read_uint32(blob);
1199 ctx.nir->num_uniforms = blob_read_uint32(blob);
1200 ctx.nir->num_outputs = blob_read_uint32(blob);
1201 ctx.nir->num_shared = blob_read_uint32(blob);
1202 ctx.nir->scratch_size = blob_read_uint32(blob);
1203
1204 unsigned num_functions = blob_read_uint32(blob);
1205 for (unsigned i = 0; i < num_functions; i++)
1206 read_function(&ctx);
1207
1208 nir_foreach_function(fxn, ctx.nir)
1209 fxn->impl = read_function_impl(&ctx, fxn);
1210
1211 ctx.nir->constant_data_size = blob_read_uint32(blob);
1212 if (ctx.nir->constant_data_size > 0) {
1213 ctx.nir->constant_data =
1214 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
1215 blob_copy_bytes(blob, ctx.nir->constant_data,
1216 ctx.nir->constant_data_size);
1217 }
1218
1219 free(ctx.idx_table);
1220
1221 return ctx.nir;
1222 }
1223
1224 void
1225 nir_shader_serialize_deserialize(nir_shader *shader)
1226 {
1227 const struct nir_shader_compiler_options *options = shader->options;
1228
1229 struct blob writer;
1230 blob_init(&writer);
1231 nir_serialize(&writer, shader, false);
1232
1233 /* Delete all of dest's ralloc children but leave dest alone */
1234 void *dead_ctx = ralloc_context(NULL);
1235 ralloc_adopt(dead_ctx, shader);
1236 ralloc_free(dead_ctx);
1237
1238 dead_ctx = ralloc_context(NULL);
1239
1240 struct blob_reader reader;
1241 blob_reader_init(&reader, writer.data, writer.size);
1242 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
1243
1244 blob_finish(&writer);
1245
1246 nir_shader_replace(shader, copy);
1247 ralloc_free(dead_ctx);
1248 }