nir: Add a concept of per-member structs and a lowering pass
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27
28 typedef struct {
29 size_t blob_offset;
30 nir_ssa_def *src;
31 nir_block *block;
32 } write_phi_fixup;
33
34 typedef struct {
35 const nir_shader *nir;
36
37 struct blob *blob;
38
39 /* maps pointer to index */
40 struct hash_table *remap_table;
41
42 /* the next index to assign to a NIR in-memory object */
43 uintptr_t next_idx;
44
45 /* Array of write_phi_fixup structs representing phi sources that need to
46 * be resolved in the second pass.
47 */
48 struct util_dynarray phi_fixups;
49 } write_ctx;
50
51 typedef struct {
52 nir_shader *nir;
53
54 struct blob_reader *blob;
55
56 /* the next index to assign to a NIR in-memory object */
57 uintptr_t next_idx;
58
59 /* The length of the index -> object table */
60 uintptr_t idx_table_len;
61
62 /* map from index to deserialized pointer */
63 void **idx_table;
64
65 /* List of phi sources. */
66 struct list_head phi_srcs;
67
68 } read_ctx;
69
70 static void
71 write_add_object(write_ctx *ctx, const void *obj)
72 {
73 uintptr_t index = ctx->next_idx++;
74 _mesa_hash_table_insert(ctx->remap_table, obj, (void *) index);
75 }
76
77 static uintptr_t
78 write_lookup_object(write_ctx *ctx, const void *obj)
79 {
80 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
81 assert(entry);
82 return (uintptr_t) entry->data;
83 }
84
85 static void
86 write_object(write_ctx *ctx, const void *obj)
87 {
88 blob_write_intptr(ctx->blob, write_lookup_object(ctx, obj));
89 }
90
91 static void
92 read_add_object(read_ctx *ctx, void *obj)
93 {
94 assert(ctx->next_idx < ctx->idx_table_len);
95 ctx->idx_table[ctx->next_idx++] = obj;
96 }
97
98 static void *
99 read_lookup_object(read_ctx *ctx, uintptr_t idx)
100 {
101 assert(idx < ctx->idx_table_len);
102 return ctx->idx_table[idx];
103 }
104
105 static void *
106 read_object(read_ctx *ctx)
107 {
108 return read_lookup_object(ctx, blob_read_intptr(ctx->blob));
109 }
110
111 static void
112 write_constant(write_ctx *ctx, const nir_constant *c)
113 {
114 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
115 blob_write_uint32(ctx->blob, c->num_elements);
116 for (unsigned i = 0; i < c->num_elements; i++)
117 write_constant(ctx, c->elements[i]);
118 }
119
120 static nir_constant *
121 read_constant(read_ctx *ctx, nir_variable *nvar)
122 {
123 nir_constant *c = ralloc(nvar, nir_constant);
124
125 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
126 c->num_elements = blob_read_uint32(ctx->blob);
127 c->elements = ralloc_array(ctx->nir, nir_constant *, c->num_elements);
128 for (unsigned i = 0; i < c->num_elements; i++)
129 c->elements[i] = read_constant(ctx, nvar);
130
131 return c;
132 }
133
134 static void
135 write_variable(write_ctx *ctx, const nir_variable *var)
136 {
137 write_add_object(ctx, var);
138 encode_type_to_blob(ctx->blob, var->type);
139 blob_write_uint32(ctx->blob, !!(var->name));
140 if (var->name)
141 blob_write_string(ctx->blob, var->name);
142 blob_write_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
143 blob_write_uint32(ctx->blob, var->num_state_slots);
144 blob_write_bytes(ctx->blob, (uint8_t *) var->state_slots,
145 var->num_state_slots * sizeof(nir_state_slot));
146 blob_write_uint32(ctx->blob, !!(var->constant_initializer));
147 if (var->constant_initializer)
148 write_constant(ctx, var->constant_initializer);
149 blob_write_uint32(ctx->blob, !!(var->interface_type));
150 if (var->interface_type)
151 encode_type_to_blob(ctx->blob, var->interface_type);
152 blob_write_uint32(ctx->blob, var->num_members);
153 if (var->num_members > 0) {
154 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
155 var->num_members * sizeof(*var->members));
156 }
157 }
158
159 static nir_variable *
160 read_variable(read_ctx *ctx)
161 {
162 nir_variable *var = rzalloc(ctx->nir, nir_variable);
163 read_add_object(ctx, var);
164
165 var->type = decode_type_from_blob(ctx->blob);
166 bool has_name = blob_read_uint32(ctx->blob);
167 if (has_name) {
168 const char *name = blob_read_string(ctx->blob);
169 var->name = ralloc_strdup(var, name);
170 } else {
171 var->name = NULL;
172 }
173 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
174 var->num_state_slots = blob_read_uint32(ctx->blob);
175 var->state_slots = ralloc_array(var, nir_state_slot, var->num_state_slots);
176 blob_copy_bytes(ctx->blob, (uint8_t *) var->state_slots,
177 var->num_state_slots * sizeof(nir_state_slot));
178 bool has_const_initializer = blob_read_uint32(ctx->blob);
179 if (has_const_initializer)
180 var->constant_initializer = read_constant(ctx, var);
181 else
182 var->constant_initializer = NULL;
183 bool has_interface_type = blob_read_uint32(ctx->blob);
184 if (has_interface_type)
185 var->interface_type = decode_type_from_blob(ctx->blob);
186 else
187 var->interface_type = NULL;
188 var->num_members = blob_read_uint32(ctx->blob);
189 if (var->num_members > 0) {
190 var->members = ralloc_array(var, struct nir_variable_data,
191 var->num_members);
192 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
193 var->num_members * sizeof(*var->members));
194 }
195
196 return var;
197 }
198
199 static void
200 write_var_list(write_ctx *ctx, const struct exec_list *src)
201 {
202 blob_write_uint32(ctx->blob, exec_list_length(src));
203 foreach_list_typed(nir_variable, var, node, src) {
204 write_variable(ctx, var);
205 }
206 }
207
208 static void
209 read_var_list(read_ctx *ctx, struct exec_list *dst)
210 {
211 exec_list_make_empty(dst);
212 unsigned num_vars = blob_read_uint32(ctx->blob);
213 for (unsigned i = 0; i < num_vars; i++) {
214 nir_variable *var = read_variable(ctx);
215 exec_list_push_tail(dst, &var->node);
216 }
217 }
218
219 static void
220 write_register(write_ctx *ctx, const nir_register *reg)
221 {
222 write_add_object(ctx, reg);
223 blob_write_uint32(ctx->blob, reg->num_components);
224 blob_write_uint32(ctx->blob, reg->bit_size);
225 blob_write_uint32(ctx->blob, reg->num_array_elems);
226 blob_write_uint32(ctx->blob, reg->index);
227 blob_write_uint32(ctx->blob, !!(reg->name));
228 if (reg->name)
229 blob_write_string(ctx->blob, reg->name);
230 blob_write_uint32(ctx->blob, reg->is_global << 1 | reg->is_packed);
231 }
232
233 static nir_register *
234 read_register(read_ctx *ctx)
235 {
236 nir_register *reg = ralloc(ctx->nir, nir_register);
237 read_add_object(ctx, reg);
238 reg->num_components = blob_read_uint32(ctx->blob);
239 reg->bit_size = blob_read_uint32(ctx->blob);
240 reg->num_array_elems = blob_read_uint32(ctx->blob);
241 reg->index = blob_read_uint32(ctx->blob);
242 bool has_name = blob_read_uint32(ctx->blob);
243 if (has_name) {
244 const char *name = blob_read_string(ctx->blob);
245 reg->name = ralloc_strdup(reg, name);
246 } else {
247 reg->name = NULL;
248 }
249 unsigned flags = blob_read_uint32(ctx->blob);
250 reg->is_global = flags & 0x2;
251 reg->is_packed = flags & 0x1;
252
253 list_inithead(&reg->uses);
254 list_inithead(&reg->defs);
255 list_inithead(&reg->if_uses);
256
257 return reg;
258 }
259
260 static void
261 write_reg_list(write_ctx *ctx, const struct exec_list *src)
262 {
263 blob_write_uint32(ctx->blob, exec_list_length(src));
264 foreach_list_typed(nir_register, reg, node, src)
265 write_register(ctx, reg);
266 }
267
268 static void
269 read_reg_list(read_ctx *ctx, struct exec_list *dst)
270 {
271 exec_list_make_empty(dst);
272 unsigned num_regs = blob_read_uint32(ctx->blob);
273 for (unsigned i = 0; i < num_regs; i++) {
274 nir_register *reg = read_register(ctx);
275 exec_list_push_tail(dst, &reg->node);
276 }
277 }
278
279 static void
280 write_src(write_ctx *ctx, const nir_src *src)
281 {
282 /* Since sources are very frequent, we try to save some space when storing
283 * them. In particular, we store whether the source is a register and
284 * whether the register has an indirect index in the low two bits. We can
285 * assume that the high two bits of the index are zero, since otherwise our
286 * address space would've been exhausted allocating the remap table!
287 */
288 if (src->is_ssa) {
289 uintptr_t idx = write_lookup_object(ctx, src->ssa) << 2;
290 idx |= 1;
291 blob_write_intptr(ctx->blob, idx);
292 } else {
293 uintptr_t idx = write_lookup_object(ctx, src->reg.reg) << 2;
294 if (src->reg.indirect)
295 idx |= 2;
296 blob_write_intptr(ctx->blob, idx);
297 blob_write_uint32(ctx->blob, src->reg.base_offset);
298 if (src->reg.indirect) {
299 write_src(ctx, src->reg.indirect);
300 }
301 }
302 }
303
304 static void
305 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
306 {
307 uintptr_t val = blob_read_intptr(ctx->blob);
308 uintptr_t idx = val >> 2;
309 src->is_ssa = val & 0x1;
310 if (src->is_ssa) {
311 src->ssa = read_lookup_object(ctx, idx);
312 } else {
313 bool is_indirect = val & 0x2;
314 src->reg.reg = read_lookup_object(ctx, idx);
315 src->reg.base_offset = blob_read_uint32(ctx->blob);
316 if (is_indirect) {
317 src->reg.indirect = ralloc(mem_ctx, nir_src);
318 read_src(ctx, src->reg.indirect, mem_ctx);
319 } else {
320 src->reg.indirect = NULL;
321 }
322 }
323 }
324
325 static void
326 write_dest(write_ctx *ctx, const nir_dest *dst)
327 {
328 uint32_t val = dst->is_ssa;
329 if (dst->is_ssa) {
330 val |= !!(dst->ssa.name) << 1;
331 val |= dst->ssa.num_components << 2;
332 val |= dst->ssa.bit_size << 5;
333 } else {
334 val |= !!(dst->reg.indirect) << 1;
335 }
336 blob_write_uint32(ctx->blob, val);
337 if (dst->is_ssa) {
338 write_add_object(ctx, &dst->ssa);
339 if (dst->ssa.name)
340 blob_write_string(ctx->blob, dst->ssa.name);
341 } else {
342 blob_write_intptr(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
343 blob_write_uint32(ctx->blob, dst->reg.base_offset);
344 if (dst->reg.indirect)
345 write_src(ctx, dst->reg.indirect);
346 }
347 }
348
349 static void
350 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr)
351 {
352 uint32_t val = blob_read_uint32(ctx->blob);
353 bool is_ssa = val & 0x1;
354 if (is_ssa) {
355 bool has_name = val & 0x2;
356 unsigned num_components = (val >> 2) & 0x7;
357 unsigned bit_size = val >> 5;
358 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
359 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
360 read_add_object(ctx, &dst->ssa);
361 } else {
362 bool is_indirect = val & 0x2;
363 dst->reg.reg = read_object(ctx);
364 dst->reg.base_offset = blob_read_uint32(ctx->blob);
365 if (is_indirect) {
366 dst->reg.indirect = ralloc(instr, nir_src);
367 read_src(ctx, dst->reg.indirect, instr);
368 }
369 }
370 }
371
372 static void
373 write_deref_chain(write_ctx *ctx, const nir_deref_var *deref_var)
374 {
375 write_object(ctx, deref_var->var);
376
377 uint32_t len = 0;
378 for (const nir_deref *d = deref_var->deref.child; d; d = d->child)
379 len++;
380 blob_write_uint32(ctx->blob, len);
381
382 for (const nir_deref *d = deref_var->deref.child; d; d = d->child) {
383 blob_write_uint32(ctx->blob, d->deref_type);
384 switch (d->deref_type) {
385 case nir_deref_type_array: {
386 const nir_deref_array *deref_array = nir_deref_as_array(d);
387 blob_write_uint32(ctx->blob, deref_array->deref_array_type);
388 blob_write_uint32(ctx->blob, deref_array->base_offset);
389 if (deref_array->deref_array_type == nir_deref_array_type_indirect)
390 write_src(ctx, &deref_array->indirect);
391 break;
392 }
393 case nir_deref_type_struct: {
394 const nir_deref_struct *deref_struct = nir_deref_as_struct(d);
395 blob_write_uint32(ctx->blob, deref_struct->index);
396 break;
397 }
398 case nir_deref_type_var:
399 unreachable("Invalid deref type");
400 }
401
402 encode_type_to_blob(ctx->blob, d->type);
403 }
404 }
405
406 static nir_deref_var *
407 read_deref_chain(read_ctx *ctx, void *mem_ctx)
408 {
409 nir_variable *var = read_object(ctx);
410 nir_deref_var *deref_var = nir_deref_var_create(mem_ctx, var);
411
412 uint32_t len = blob_read_uint32(ctx->blob);
413
414 nir_deref *tail = &deref_var->deref;
415 for (uint32_t i = 0; i < len; i++) {
416 nir_deref_type deref_type = blob_read_uint32(ctx->blob);
417 nir_deref *deref = NULL;
418 switch (deref_type) {
419 case nir_deref_type_array: {
420 nir_deref_array *deref_array = nir_deref_array_create(tail);
421 deref_array->deref_array_type = blob_read_uint32(ctx->blob);
422 deref_array->base_offset = blob_read_uint32(ctx->blob);
423 if (deref_array->deref_array_type == nir_deref_array_type_indirect)
424 read_src(ctx, &deref_array->indirect, mem_ctx);
425 deref = &deref_array->deref;
426 break;
427 }
428 case nir_deref_type_struct: {
429 uint32_t index = blob_read_uint32(ctx->blob);
430 nir_deref_struct *deref_struct = nir_deref_struct_create(tail, index);
431 deref = &deref_struct->deref;
432 break;
433 }
434 case nir_deref_type_var:
435 unreachable("Invalid deref type");
436 }
437
438 deref->type = decode_type_from_blob(ctx->blob);
439
440 tail->child = deref;
441 tail = deref;
442 }
443
444 return deref_var;
445 }
446
447 static void
448 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
449 {
450 blob_write_uint32(ctx->blob, alu->op);
451 uint32_t flags = alu->exact;
452 flags |= alu->dest.saturate << 1;
453 flags |= alu->dest.write_mask << 2;
454 blob_write_uint32(ctx->blob, flags);
455
456 write_dest(ctx, &alu->dest.dest);
457
458 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
459 write_src(ctx, &alu->src[i].src);
460 flags = alu->src[i].negate;
461 flags |= alu->src[i].abs << 1;
462 for (unsigned j = 0; j < 4; j++)
463 flags |= alu->src[i].swizzle[j] << (2 + 2 * j);
464 blob_write_uint32(ctx->blob, flags);
465 }
466 }
467
468 static nir_alu_instr *
469 read_alu(read_ctx *ctx)
470 {
471 nir_op op = blob_read_uint32(ctx->blob);
472 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, op);
473
474 uint32_t flags = blob_read_uint32(ctx->blob);
475 alu->exact = flags & 1;
476 alu->dest.saturate = flags & 2;
477 alu->dest.write_mask = flags >> 2;
478
479 read_dest(ctx, &alu->dest.dest, &alu->instr);
480
481 for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
482 read_src(ctx, &alu->src[i].src, &alu->instr);
483 flags = blob_read_uint32(ctx->blob);
484 alu->src[i].negate = flags & 1;
485 alu->src[i].abs = flags & 2;
486 for (unsigned j = 0; j < 4; j++)
487 alu->src[i].swizzle[j] = (flags >> (2 * j + 2)) & 3;
488 }
489
490 return alu;
491 }
492
493 static void
494 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
495 {
496 blob_write_uint32(ctx->blob, deref->deref_type);
497
498 blob_write_uint32(ctx->blob, deref->mode);
499 encode_type_to_blob(ctx->blob, deref->type);
500
501 write_dest(ctx, &deref->dest);
502
503 if (deref->deref_type == nir_deref_type_var) {
504 write_object(ctx, deref->var);
505 return;
506 }
507
508 write_src(ctx, &deref->parent);
509
510 switch (deref->deref_type) {
511 case nir_deref_type_struct:
512 blob_write_uint32(ctx->blob, deref->strct.index);
513 break;
514
515 case nir_deref_type_array:
516 write_src(ctx, &deref->arr.index);
517 break;
518
519 case nir_deref_type_array_wildcard:
520 case nir_deref_type_cast:
521 /* Nothing to do */
522 break;
523
524 default:
525 unreachable("Invalid deref type");
526 }
527 }
528
529 static nir_deref_instr *
530 read_deref(read_ctx *ctx)
531 {
532 nir_deref_type deref_type = blob_read_uint32(ctx->blob);
533 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
534
535 deref->mode = blob_read_uint32(ctx->blob);
536 deref->type = decode_type_from_blob(ctx->blob);
537
538 read_dest(ctx, &deref->dest, &deref->instr);
539
540 if (deref_type == nir_deref_type_var) {
541 deref->var = read_object(ctx);
542 return deref;
543 }
544
545 read_src(ctx, &deref->parent, &deref->instr);
546
547 switch (deref->deref_type) {
548 case nir_deref_type_struct:
549 deref->strct.index = blob_read_uint32(ctx->blob);
550 break;
551
552 case nir_deref_type_array:
553 read_src(ctx, &deref->arr.index, &deref->instr);
554 break;
555
556 case nir_deref_type_array_wildcard:
557 case nir_deref_type_cast:
558 /* Nothing to do */
559 break;
560
561 default:
562 unreachable("Invalid deref type");
563 }
564
565 return deref;
566 }
567
568 static void
569 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
570 {
571 blob_write_uint32(ctx->blob, intrin->intrinsic);
572
573 unsigned num_variables = nir_intrinsic_infos[intrin->intrinsic].num_variables;
574 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
575 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
576
577 blob_write_uint32(ctx->blob, intrin->num_components);
578
579 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
580 write_dest(ctx, &intrin->dest);
581
582 for (unsigned i = 0; i < num_variables; i++)
583 write_deref_chain(ctx, intrin->variables[i]);
584
585 for (unsigned i = 0; i < num_srcs; i++)
586 write_src(ctx, &intrin->src[i]);
587
588 for (unsigned i = 0; i < num_indices; i++)
589 blob_write_uint32(ctx->blob, intrin->const_index[i]);
590 }
591
592 static nir_intrinsic_instr *
593 read_intrinsic(read_ctx *ctx)
594 {
595 nir_intrinsic_op op = blob_read_uint32(ctx->blob);
596
597 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
598
599 unsigned num_variables = nir_intrinsic_infos[op].num_variables;
600 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
601 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
602
603 intrin->num_components = blob_read_uint32(ctx->blob);
604
605 if (nir_intrinsic_infos[op].has_dest)
606 read_dest(ctx, &intrin->dest, &intrin->instr);
607
608 for (unsigned i = 0; i < num_variables; i++)
609 intrin->variables[i] = read_deref_chain(ctx, &intrin->instr);
610
611 for (unsigned i = 0; i < num_srcs; i++)
612 read_src(ctx, &intrin->src[i], &intrin->instr);
613
614 for (unsigned i = 0; i < num_indices; i++)
615 intrin->const_index[i] = blob_read_uint32(ctx->blob);
616
617 return intrin;
618 }
619
620 static void
621 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
622 {
623 uint32_t val = lc->def.num_components;
624 val |= lc->def.bit_size << 3;
625 blob_write_uint32(ctx->blob, val);
626 blob_write_bytes(ctx->blob, (uint8_t *) &lc->value, sizeof(lc->value));
627 write_add_object(ctx, &lc->def);
628 }
629
630 static nir_load_const_instr *
631 read_load_const(read_ctx *ctx)
632 {
633 uint32_t val = blob_read_uint32(ctx->blob);
634
635 nir_load_const_instr *lc =
636 nir_load_const_instr_create(ctx->nir, val & 0x7, val >> 3);
637
638 blob_copy_bytes(ctx->blob, (uint8_t *) &lc->value, sizeof(lc->value));
639 read_add_object(ctx, &lc->def);
640 return lc;
641 }
642
643 static void
644 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
645 {
646 uint32_t val = undef->def.num_components;
647 val |= undef->def.bit_size << 3;
648 blob_write_uint32(ctx->blob, val);
649 write_add_object(ctx, &undef->def);
650 }
651
652 static nir_ssa_undef_instr *
653 read_ssa_undef(read_ctx *ctx)
654 {
655 uint32_t val = blob_read_uint32(ctx->blob);
656
657 nir_ssa_undef_instr *undef =
658 nir_ssa_undef_instr_create(ctx->nir, val & 0x7, val >> 3);
659
660 read_add_object(ctx, &undef->def);
661 return undef;
662 }
663
664 union packed_tex_data {
665 uint32_t u32;
666 struct {
667 enum glsl_sampler_dim sampler_dim:4;
668 nir_alu_type dest_type:8;
669 unsigned coord_components:3;
670 unsigned is_array:1;
671 unsigned is_shadow:1;
672 unsigned is_new_style_shadow:1;
673 unsigned component:2;
674 unsigned has_texture_deref:1;
675 unsigned has_sampler_deref:1;
676 unsigned unused:10; /* Mark unused for valgrind. */
677 } u;
678 };
679
680 static void
681 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
682 {
683 blob_write_uint32(ctx->blob, tex->num_srcs);
684 blob_write_uint32(ctx->blob, tex->op);
685 blob_write_uint32(ctx->blob, tex->texture_index);
686 blob_write_uint32(ctx->blob, tex->texture_array_size);
687 blob_write_uint32(ctx->blob, tex->sampler_index);
688
689 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
690 union packed_tex_data packed = {
691 .u.sampler_dim = tex->sampler_dim,
692 .u.dest_type = tex->dest_type,
693 .u.coord_components = tex->coord_components,
694 .u.is_array = tex->is_array,
695 .u.is_shadow = tex->is_shadow,
696 .u.is_new_style_shadow = tex->is_new_style_shadow,
697 .u.component = tex->component,
698 .u.has_texture_deref = tex->texture != NULL,
699 .u.has_sampler_deref = tex->sampler != NULL,
700 };
701 blob_write_uint32(ctx->blob, packed.u32);
702
703 write_dest(ctx, &tex->dest);
704 for (unsigned i = 0; i < tex->num_srcs; i++) {
705 blob_write_uint32(ctx->blob, tex->src[i].src_type);
706 write_src(ctx, &tex->src[i].src);
707 }
708
709 if (tex->texture)
710 write_deref_chain(ctx, tex->texture);
711 if (tex->sampler)
712 write_deref_chain(ctx, tex->sampler);
713 }
714
715 static nir_tex_instr *
716 read_tex(read_ctx *ctx)
717 {
718 unsigned num_srcs = blob_read_uint32(ctx->blob);
719 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, num_srcs);
720
721 tex->op = blob_read_uint32(ctx->blob);
722 tex->texture_index = blob_read_uint32(ctx->blob);
723 tex->texture_array_size = blob_read_uint32(ctx->blob);
724 tex->sampler_index = blob_read_uint32(ctx->blob);
725
726 union packed_tex_data packed;
727 packed.u32 = blob_read_uint32(ctx->blob);
728 tex->sampler_dim = packed.u.sampler_dim;
729 tex->dest_type = packed.u.dest_type;
730 tex->coord_components = packed.u.coord_components;
731 tex->is_array = packed.u.is_array;
732 tex->is_shadow = packed.u.is_shadow;
733 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
734 tex->component = packed.u.component;
735
736 read_dest(ctx, &tex->dest, &tex->instr);
737 for (unsigned i = 0; i < tex->num_srcs; i++) {
738 tex->src[i].src_type = blob_read_uint32(ctx->blob);
739 read_src(ctx, &tex->src[i].src, &tex->instr);
740 }
741
742 tex->texture = packed.u.has_texture_deref ?
743 read_deref_chain(ctx, &tex->instr) : NULL;
744 tex->sampler = packed.u.has_sampler_deref ?
745 read_deref_chain(ctx, &tex->instr) : NULL;
746
747 return tex;
748 }
749
750 static void
751 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
752 {
753 /* Phi nodes are special, since they may reference SSA definitions and
754 * basic blocks that don't exist yet. We leave two empty uintptr_t's here,
755 * and then store enough information so that a later fixup pass can fill
756 * them in correctly.
757 */
758 write_dest(ctx, &phi->dest);
759
760 blob_write_uint32(ctx->blob, exec_list_length(&phi->srcs));
761
762 nir_foreach_phi_src(src, phi) {
763 assert(src->src.is_ssa);
764 size_t blob_offset = blob_reserve_intptr(ctx->blob);
765 MAYBE_UNUSED size_t blob_offset2 = blob_reserve_intptr(ctx->blob);
766 assert(blob_offset + sizeof(uintptr_t) == blob_offset2);
767 write_phi_fixup fixup = {
768 .blob_offset = blob_offset,
769 .src = src->src.ssa,
770 .block = src->pred,
771 };
772 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
773 }
774 }
775
776 static void
777 write_fixup_phis(write_ctx *ctx)
778 {
779 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
780 uintptr_t *blob_ptr = (uintptr_t *)(ctx->blob->data + fixup->blob_offset);
781 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
782 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
783 }
784
785 util_dynarray_clear(&ctx->phi_fixups);
786 }
787
788 static nir_phi_instr *
789 read_phi(read_ctx *ctx, nir_block *blk)
790 {
791 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
792
793 read_dest(ctx, &phi->dest, &phi->instr);
794
795 unsigned num_srcs = blob_read_uint32(ctx->blob);
796
797 /* For similar reasons as before, we just store the index directly into the
798 * pointer, and let a later pass resolve the phi sources.
799 *
800 * In order to ensure that the copied sources (which are just the indices
801 * from the blob for now) don't get inserted into the old shader's use-def
802 * lists, we have to add the phi instruction *before* we set up its
803 * sources.
804 */
805 nir_instr_insert_after_block(blk, &phi->instr);
806
807 for (unsigned i = 0; i < num_srcs; i++) {
808 nir_phi_src *src = ralloc(phi, nir_phi_src);
809
810 src->src.is_ssa = true;
811 src->src.ssa = (nir_ssa_def *) blob_read_intptr(ctx->blob);
812 src->pred = (nir_block *) blob_read_intptr(ctx->blob);
813
814 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
815 * we have to set the parent_instr manually. It doesn't really matter
816 * when we do it, so we might as well do it here.
817 */
818 src->src.parent_instr = &phi->instr;
819
820 /* Stash it in the list of phi sources. We'll walk this list and fix up
821 * sources at the very end of read_function_impl.
822 */
823 list_add(&src->src.use_link, &ctx->phi_srcs);
824
825 exec_list_push_tail(&phi->srcs, &src->node);
826 }
827
828 return phi;
829 }
830
831 static void
832 read_fixup_phis(read_ctx *ctx)
833 {
834 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
835 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
836 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
837
838 /* Remove from this list */
839 list_del(&src->src.use_link);
840
841 list_addtail(&src->src.use_link, &src->src.ssa->uses);
842 }
843 assert(list_empty(&ctx->phi_srcs));
844 }
845
846 static void
847 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
848 {
849 blob_write_uint32(ctx->blob, jmp->type);
850 }
851
852 static nir_jump_instr *
853 read_jump(read_ctx *ctx)
854 {
855 nir_jump_type type = blob_read_uint32(ctx->blob);
856 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, type);
857 return jmp;
858 }
859
860 static void
861 write_call(write_ctx *ctx, const nir_call_instr *call)
862 {
863 blob_write_intptr(ctx->blob, write_lookup_object(ctx, call->callee));
864
865 for (unsigned i = 0; i < call->num_params; i++)
866 write_deref_chain(ctx, call->params[i]);
867
868 write_deref_chain(ctx, call->return_deref);
869 }
870
871 static nir_call_instr *
872 read_call(read_ctx *ctx)
873 {
874 nir_function *callee = read_object(ctx);
875 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
876
877 for (unsigned i = 0; i < call->num_params; i++)
878 call->params[i] = read_deref_chain(ctx, &call->instr);
879
880 call->return_deref = read_deref_chain(ctx, &call->instr);
881
882 return call;
883 }
884
885 static void
886 write_instr(write_ctx *ctx, const nir_instr *instr)
887 {
888 blob_write_uint32(ctx->blob, instr->type);
889 switch (instr->type) {
890 case nir_instr_type_alu:
891 write_alu(ctx, nir_instr_as_alu(instr));
892 break;
893 case nir_instr_type_deref:
894 write_deref(ctx, nir_instr_as_deref(instr));
895 break;
896 case nir_instr_type_intrinsic:
897 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
898 break;
899 case nir_instr_type_load_const:
900 write_load_const(ctx, nir_instr_as_load_const(instr));
901 break;
902 case nir_instr_type_ssa_undef:
903 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
904 break;
905 case nir_instr_type_tex:
906 write_tex(ctx, nir_instr_as_tex(instr));
907 break;
908 case nir_instr_type_phi:
909 write_phi(ctx, nir_instr_as_phi(instr));
910 break;
911 case nir_instr_type_jump:
912 write_jump(ctx, nir_instr_as_jump(instr));
913 break;
914 case nir_instr_type_call:
915 write_call(ctx, nir_instr_as_call(instr));
916 break;
917 case nir_instr_type_parallel_copy:
918 unreachable("Cannot write parallel copies");
919 default:
920 unreachable("bad instr type");
921 }
922 }
923
924 static void
925 read_instr(read_ctx *ctx, nir_block *block)
926 {
927 nir_instr_type type = blob_read_uint32(ctx->blob);
928 nir_instr *instr;
929 switch (type) {
930 case nir_instr_type_alu:
931 instr = &read_alu(ctx)->instr;
932 break;
933 case nir_instr_type_deref:
934 instr = &read_deref(ctx)->instr;
935 break;
936 case nir_instr_type_intrinsic:
937 instr = &read_intrinsic(ctx)->instr;
938 break;
939 case nir_instr_type_load_const:
940 instr = &read_load_const(ctx)->instr;
941 break;
942 case nir_instr_type_ssa_undef:
943 instr = &read_ssa_undef(ctx)->instr;
944 break;
945 case nir_instr_type_tex:
946 instr = &read_tex(ctx)->instr;
947 break;
948 case nir_instr_type_phi:
949 /* Phi instructions are a bit of a special case when reading because we
950 * don't want inserting the instruction to automatically handle use/defs
951 * for us. Instead, we need to wait until all the blocks/instructions
952 * are read so that we can set their sources up.
953 */
954 read_phi(ctx, block);
955 return;
956 case nir_instr_type_jump:
957 instr = &read_jump(ctx)->instr;
958 break;
959 case nir_instr_type_call:
960 instr = &read_call(ctx)->instr;
961 break;
962 case nir_instr_type_parallel_copy:
963 unreachable("Cannot read parallel copies");
964 default:
965 unreachable("bad instr type");
966 }
967
968 nir_instr_insert_after_block(block, instr);
969 }
970
971 static void
972 write_block(write_ctx *ctx, const nir_block *block)
973 {
974 write_add_object(ctx, block);
975 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
976 nir_foreach_instr(instr, block)
977 write_instr(ctx, instr);
978 }
979
980 static void
981 read_block(read_ctx *ctx, struct exec_list *cf_list)
982 {
983 /* Don't actually create a new block. Just use the one from the tail of
984 * the list. NIR guarantees that the tail of the list is a block and that
985 * no two blocks are side-by-side in the IR; It should be empty.
986 */
987 nir_block *block =
988 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
989
990 read_add_object(ctx, block);
991 unsigned num_instrs = blob_read_uint32(ctx->blob);
992 for (unsigned i = 0; i < num_instrs; i++) {
993 read_instr(ctx, block);
994 }
995 }
996
997 static void
998 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
999
1000 static void
1001 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1002
1003 static void
1004 write_if(write_ctx *ctx, nir_if *nif)
1005 {
1006 write_src(ctx, &nif->condition);
1007
1008 write_cf_list(ctx, &nif->then_list);
1009 write_cf_list(ctx, &nif->else_list);
1010 }
1011
1012 static void
1013 read_if(read_ctx *ctx, struct exec_list *cf_list)
1014 {
1015 nir_if *nif = nir_if_create(ctx->nir);
1016
1017 read_src(ctx, &nif->condition, nif);
1018
1019 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1020
1021 read_cf_list(ctx, &nif->then_list);
1022 read_cf_list(ctx, &nif->else_list);
1023 }
1024
1025 static void
1026 write_loop(write_ctx *ctx, nir_loop *loop)
1027 {
1028 write_cf_list(ctx, &loop->body);
1029 }
1030
1031 static void
1032 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1033 {
1034 nir_loop *loop = nir_loop_create(ctx->nir);
1035
1036 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1037
1038 read_cf_list(ctx, &loop->body);
1039 }
1040
1041 static void
1042 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1043 {
1044 blob_write_uint32(ctx->blob, cf->type);
1045
1046 switch (cf->type) {
1047 case nir_cf_node_block:
1048 write_block(ctx, nir_cf_node_as_block(cf));
1049 break;
1050 case nir_cf_node_if:
1051 write_if(ctx, nir_cf_node_as_if(cf));
1052 break;
1053 case nir_cf_node_loop:
1054 write_loop(ctx, nir_cf_node_as_loop(cf));
1055 break;
1056 default:
1057 unreachable("bad cf type");
1058 }
1059 }
1060
1061 static void
1062 read_cf_node(read_ctx *ctx, struct exec_list *list)
1063 {
1064 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1065
1066 switch (type) {
1067 case nir_cf_node_block:
1068 read_block(ctx, list);
1069 break;
1070 case nir_cf_node_if:
1071 read_if(ctx, list);
1072 break;
1073 case nir_cf_node_loop:
1074 read_loop(ctx, list);
1075 break;
1076 default:
1077 unreachable("bad cf type");
1078 }
1079 }
1080
1081 static void
1082 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1083 {
1084 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1085 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1086 write_cf_node(ctx, cf);
1087 }
1088 }
1089
1090 static void
1091 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1092 {
1093 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1094 for (unsigned i = 0; i < num_cf_nodes; i++)
1095 read_cf_node(ctx, cf_list);
1096 }
1097
1098 static void
1099 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1100 {
1101 write_var_list(ctx, &fi->locals);
1102 write_reg_list(ctx, &fi->registers);
1103 blob_write_uint32(ctx->blob, fi->reg_alloc);
1104
1105 blob_write_uint32(ctx->blob, fi->num_params);
1106 for (unsigned i = 0; i < fi->num_params; i++) {
1107 write_variable(ctx, fi->params[i]);
1108 }
1109
1110 blob_write_uint32(ctx->blob, !!(fi->return_var));
1111 if (fi->return_var)
1112 write_variable(ctx, fi->return_var);
1113
1114 write_cf_list(ctx, &fi->body);
1115 write_fixup_phis(ctx);
1116 }
1117
1118 static nir_function_impl *
1119 read_function_impl(read_ctx *ctx, nir_function *fxn)
1120 {
1121 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1122 fi->function = fxn;
1123
1124 read_var_list(ctx, &fi->locals);
1125 read_reg_list(ctx, &fi->registers);
1126 fi->reg_alloc = blob_read_uint32(ctx->blob);
1127
1128 fi->num_params = blob_read_uint32(ctx->blob);
1129 for (unsigned i = 0; i < fi->num_params; i++) {
1130 fi->params[i] = read_variable(ctx);
1131 }
1132
1133 bool has_return = blob_read_uint32(ctx->blob);
1134 if (has_return)
1135 fi->return_var = read_variable(ctx);
1136 else
1137 fi->return_var = NULL;
1138
1139 read_cf_list(ctx, &fi->body);
1140 read_fixup_phis(ctx);
1141
1142 fi->valid_metadata = 0;
1143
1144 return fi;
1145 }
1146
1147 static void
1148 write_function(write_ctx *ctx, const nir_function *fxn)
1149 {
1150 blob_write_uint32(ctx->blob, !!(fxn->name));
1151 if (fxn->name)
1152 blob_write_string(ctx->blob, fxn->name);
1153
1154 write_add_object(ctx, fxn);
1155
1156 blob_write_uint32(ctx->blob, fxn->num_params);
1157 for (unsigned i = 0; i < fxn->num_params; i++) {
1158 blob_write_uint32(ctx->blob, fxn->params[i].param_type);
1159 encode_type_to_blob(ctx->blob, fxn->params[i].type);
1160 }
1161
1162 encode_type_to_blob(ctx->blob, fxn->return_type);
1163
1164 /* At first glance, it looks like we should write the function_impl here.
1165 * However, call instructions need to be able to reference at least the
1166 * function and those will get processed as we write the function_impls.
1167 * We stop here and write function_impls as a second pass.
1168 */
1169 }
1170
1171 static void
1172 read_function(read_ctx *ctx)
1173 {
1174 bool has_name = blob_read_uint32(ctx->blob);
1175 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1176
1177 nir_function *fxn = nir_function_create(ctx->nir, name);
1178
1179 read_add_object(ctx, fxn);
1180
1181 fxn->num_params = blob_read_uint32(ctx->blob);
1182 for (unsigned i = 0; i < fxn->num_params; i++) {
1183 fxn->params[i].param_type = blob_read_uint32(ctx->blob);
1184 fxn->params[i].type = decode_type_from_blob(ctx->blob);
1185 }
1186
1187 fxn->return_type = decode_type_from_blob(ctx->blob);
1188 }
1189
1190 void
1191 nir_serialize(struct blob *blob, const nir_shader *nir)
1192 {
1193 write_ctx ctx;
1194 ctx.remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
1195 _mesa_key_pointer_equal);
1196 ctx.next_idx = 0;
1197 ctx.blob = blob;
1198 ctx.nir = nir;
1199 util_dynarray_init(&ctx.phi_fixups, NULL);
1200
1201 size_t idx_size_offset = blob_reserve_intptr(blob);
1202
1203 struct shader_info info = nir->info;
1204 uint32_t strings = 0;
1205 if (info.name)
1206 strings |= 0x1;
1207 if (info.label)
1208 strings |= 0x2;
1209 blob_write_uint32(blob, strings);
1210 if (info.name)
1211 blob_write_string(blob, info.name);
1212 if (info.label)
1213 blob_write_string(blob, info.label);
1214 info.name = info.label = NULL;
1215 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1216
1217 write_var_list(&ctx, &nir->uniforms);
1218 write_var_list(&ctx, &nir->inputs);
1219 write_var_list(&ctx, &nir->outputs);
1220 write_var_list(&ctx, &nir->shared);
1221 write_var_list(&ctx, &nir->globals);
1222 write_var_list(&ctx, &nir->system_values);
1223
1224 write_reg_list(&ctx, &nir->registers);
1225 blob_write_uint32(blob, nir->reg_alloc);
1226 blob_write_uint32(blob, nir->num_inputs);
1227 blob_write_uint32(blob, nir->num_uniforms);
1228 blob_write_uint32(blob, nir->num_outputs);
1229 blob_write_uint32(blob, nir->num_shared);
1230 blob_write_uint32(blob, nir->lowered_derefs);
1231
1232 blob_write_uint32(blob, exec_list_length(&nir->functions));
1233 nir_foreach_function(fxn, nir) {
1234 write_function(&ctx, fxn);
1235 }
1236
1237 nir_foreach_function(fxn, nir) {
1238 write_function_impl(&ctx, fxn->impl);
1239 }
1240
1241 *(uintptr_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1242
1243 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1244 util_dynarray_fini(&ctx.phi_fixups);
1245 }
1246
1247 nir_shader *
1248 nir_deserialize(void *mem_ctx,
1249 const struct nir_shader_compiler_options *options,
1250 struct blob_reader *blob)
1251 {
1252 read_ctx ctx;
1253 ctx.blob = blob;
1254 list_inithead(&ctx.phi_srcs);
1255 ctx.idx_table_len = blob_read_intptr(blob);
1256 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1257 ctx.next_idx = 0;
1258
1259 uint32_t strings = blob_read_uint32(blob);
1260 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1261 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1262
1263 struct shader_info info;
1264 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1265
1266 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1267
1268 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1269 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1270
1271 ctx.nir->info = info;
1272
1273 read_var_list(&ctx, &ctx.nir->uniforms);
1274 read_var_list(&ctx, &ctx.nir->inputs);
1275 read_var_list(&ctx, &ctx.nir->outputs);
1276 read_var_list(&ctx, &ctx.nir->shared);
1277 read_var_list(&ctx, &ctx.nir->globals);
1278 read_var_list(&ctx, &ctx.nir->system_values);
1279
1280 read_reg_list(&ctx, &ctx.nir->registers);
1281 ctx.nir->reg_alloc = blob_read_uint32(blob);
1282 ctx.nir->num_inputs = blob_read_uint32(blob);
1283 ctx.nir->num_uniforms = blob_read_uint32(blob);
1284 ctx.nir->num_outputs = blob_read_uint32(blob);
1285 ctx.nir->num_shared = blob_read_uint32(blob);
1286 ctx.nir->lowered_derefs = blob_read_uint32(blob);
1287
1288 unsigned num_functions = blob_read_uint32(blob);
1289 for (unsigned i = 0; i < num_functions; i++)
1290 read_function(&ctx);
1291
1292 nir_foreach_function(fxn, ctx.nir)
1293 fxn->impl = read_function_impl(&ctx, fxn);
1294
1295 free(ctx.idx_table);
1296
1297 return ctx.nir;
1298 }
1299
1300 nir_shader *
1301 nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s)
1302 {
1303 const struct nir_shader_compiler_options *options = s->options;
1304
1305 struct blob writer;
1306 blob_init(&writer);
1307 nir_serialize(&writer, s);
1308 ralloc_free(s);
1309
1310 struct blob_reader reader;
1311 blob_reader_init(&reader, writer.data, writer.size);
1312 nir_shader *ns = nir_deserialize(mem_ctx, options, &reader);
1313
1314 blob_finish(&writer);
1315
1316 return ns;
1317 }