nir/serialize: pack load_const with non-64-bit constants better
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* The last serialized type. */
55 const struct glsl_type *last_type;
56 const struct glsl_type *last_interface_type;
57 struct nir_variable_data last_var_data;
58
59 /* Don't write optional data such as variable names. */
60 bool strip;
61 } write_ctx;
62
63 typedef struct {
64 nir_shader *nir;
65
66 struct blob_reader *blob;
67
68 /* the next index to assign to a NIR in-memory object */
69 uint32_t next_idx;
70
71 /* The length of the index -> object table */
72 uint32_t idx_table_len;
73
74 /* map from index to deserialized pointer */
75 void **idx_table;
76
77 /* List of phi sources. */
78 struct list_head phi_srcs;
79
80 /* The last deserialized type. */
81 const struct glsl_type *last_type;
82 const struct glsl_type *last_interface_type;
83 struct nir_variable_data last_var_data;
84 } read_ctx;
85
86 static void
87 write_add_object(write_ctx *ctx, const void *obj)
88 {
89 uint32_t index = ctx->next_idx++;
90 assert(index != MAX_OBJECT_IDS);
91 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
92 }
93
94 static uint32_t
95 write_lookup_object(write_ctx *ctx, const void *obj)
96 {
97 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
98 assert(entry);
99 return (uint32_t)(uintptr_t) entry->data;
100 }
101
102 static void
103 write_object(write_ctx *ctx, const void *obj)
104 {
105 blob_write_uint32(ctx->blob, write_lookup_object(ctx, obj));
106 }
107
108 static void
109 read_add_object(read_ctx *ctx, void *obj)
110 {
111 assert(ctx->next_idx < ctx->idx_table_len);
112 ctx->idx_table[ctx->next_idx++] = obj;
113 }
114
115 static void *
116 read_lookup_object(read_ctx *ctx, uint32_t idx)
117 {
118 assert(idx < ctx->idx_table_len);
119 return ctx->idx_table[idx];
120 }
121
122 static void *
123 read_object(read_ctx *ctx)
124 {
125 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
126 }
127
128 static uint32_t
129 encode_bit_size_3bits(uint8_t bit_size)
130 {
131 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
133 if (bit_size)
134 return util_logbase2(bit_size) + 1;
135 return 0;
136 }
137
138 static uint8_t
139 decode_bit_size_3bits(uint8_t bit_size)
140 {
141 if (bit_size)
142 return 1 << (bit_size - 1);
143 return 0;
144 }
145
146 static uint8_t
147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149 if (num_components <= 4)
150 return num_components;
151 if (num_components == 8)
152 return 5;
153 if (num_components == 16)
154 return 6;
155
156 unreachable("invalid number in num_components");
157 return 0;
158 }
159
160 static uint8_t
161 decode_num_components_in_3bits(uint8_t value)
162 {
163 if (value <= 4)
164 return value;
165 if (value == 5)
166 return 8;
167 if (value == 6)
168 return 16;
169
170 unreachable("invalid num_components encoding");
171 return 0;
172 }
173
174 static void
175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178 blob_write_uint32(ctx->blob, c->num_elements);
179 for (unsigned i = 0; i < c->num_elements; i++)
180 write_constant(ctx, c->elements[i]);
181 }
182
183 static nir_constant *
184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186 nir_constant *c = ralloc(nvar, nir_constant);
187
188 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189 c->num_elements = blob_read_uint32(ctx->blob);
190 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191 for (unsigned i = 0; i < c->num_elements; i++)
192 c->elements[i] = read_constant(ctx, nvar);
193
194 return c;
195 }
196
197 enum var_data_encoding {
198 var_encode_full,
199 var_encode_shader_temp,
200 var_encode_function_temp,
201 var_encode_location_diff,
202 };
203
204 union packed_var {
205 uint32_t u32;
206 struct {
207 unsigned has_name:1;
208 unsigned has_constant_initializer:1;
209 unsigned has_interface_type:1;
210 unsigned num_state_slots:7;
211 unsigned data_encoding:2;
212 unsigned type_same_as_last:1;
213 unsigned interface_type_same_as_last:1;
214 unsigned _pad:2;
215 unsigned num_members:16;
216 } u;
217 };
218
219 union packed_var_data_diff {
220 uint32_t u32;
221 struct {
222 int location:13;
223 int location_frac:3;
224 int driver_location:16;
225 } u;
226 };
227
228 static void
229 write_variable(write_ctx *ctx, const nir_variable *var)
230 {
231 write_add_object(ctx, var);
232
233 assert(var->num_state_slots < (1 << 7));
234 assert(var->num_members < (1 << 16));
235
236 STATIC_ASSERT(sizeof(union packed_var) == 4);
237 union packed_var flags;
238 flags.u32 = 0;
239
240 flags.u.has_name = !ctx->strip && var->name;
241 flags.u.has_constant_initializer = !!(var->constant_initializer);
242 flags.u.has_interface_type = !!(var->interface_type);
243 flags.u.type_same_as_last = var->type == ctx->last_type;
244 flags.u.interface_type_same_as_last =
245 var->interface_type && var->interface_type == ctx->last_interface_type;
246 flags.u.num_state_slots = var->num_state_slots;
247 flags.u.num_members = var->num_members;
248
249 struct nir_variable_data data = var->data;
250
251 /* When stripping, we expect that the location is no longer needed,
252 * which is typically after shaders are linked.
253 */
254 if (ctx->strip &&
255 data.mode != nir_var_shader_in &&
256 data.mode != nir_var_shader_out)
257 data.location = 0;
258
259 /* Temporary variables don't serialize var->data. */
260 if (data.mode == nir_var_shader_temp)
261 flags.u.data_encoding = var_encode_shader_temp;
262 else if (data.mode == nir_var_function_temp)
263 flags.u.data_encoding = var_encode_function_temp;
264 else {
265 struct nir_variable_data tmp = data;
266
267 tmp.location = ctx->last_var_data.location;
268 tmp.location_frac = ctx->last_var_data.location_frac;
269 tmp.driver_location = ctx->last_var_data.driver_location;
270
271 /* See if we can encode only the difference in locations from the last
272 * variable.
273 */
274 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
275 abs((int)data.location -
276 (int)ctx->last_var_data.location) < (1 << 12) &&
277 abs((int)data.driver_location -
278 (int)ctx->last_var_data.driver_location) < (1 << 15))
279 flags.u.data_encoding = var_encode_location_diff;
280 else
281 flags.u.data_encoding = var_encode_full;
282 }
283
284 blob_write_uint32(ctx->blob, flags.u32);
285
286 if (!flags.u.type_same_as_last) {
287 encode_type_to_blob(ctx->blob, var->type);
288 ctx->last_type = var->type;
289 }
290
291 if (var->interface_type && !flags.u.interface_type_same_as_last) {
292 encode_type_to_blob(ctx->blob, var->interface_type);
293 ctx->last_interface_type = var->interface_type;
294 }
295
296 if (flags.u.has_name)
297 blob_write_string(ctx->blob, var->name);
298
299 if (flags.u.data_encoding == var_encode_full ||
300 flags.u.data_encoding == var_encode_location_diff) {
301 if (flags.u.data_encoding == var_encode_full) {
302 blob_write_bytes(ctx->blob, &data, sizeof(data));
303 } else {
304 /* Serialize only the difference in locations from the last variable.
305 */
306 union packed_var_data_diff diff;
307
308 diff.u.location = data.location - ctx->last_var_data.location;
309 diff.u.location_frac = data.location_frac -
310 ctx->last_var_data.location_frac;
311 diff.u.driver_location = data.driver_location -
312 ctx->last_var_data.driver_location;
313
314 blob_write_uint32(ctx->blob, diff.u32);
315 }
316
317 ctx->last_var_data = data;
318 }
319
320 for (unsigned i = 0; i < var->num_state_slots; i++) {
321 blob_write_bytes(ctx->blob, &var->state_slots[i],
322 sizeof(var->state_slots[i]));
323 }
324 if (var->constant_initializer)
325 write_constant(ctx, var->constant_initializer);
326 if (var->num_members > 0) {
327 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
328 var->num_members * sizeof(*var->members));
329 }
330 }
331
332 static nir_variable *
333 read_variable(read_ctx *ctx)
334 {
335 nir_variable *var = rzalloc(ctx->nir, nir_variable);
336 read_add_object(ctx, var);
337
338 union packed_var flags;
339 flags.u32 = blob_read_uint32(ctx->blob);
340
341 if (flags.u.type_same_as_last) {
342 var->type = ctx->last_type;
343 } else {
344 var->type = decode_type_from_blob(ctx->blob);
345 ctx->last_type = var->type;
346 }
347
348 if (flags.u.has_interface_type) {
349 if (flags.u.interface_type_same_as_last) {
350 var->interface_type = ctx->last_interface_type;
351 } else {
352 var->interface_type = decode_type_from_blob(ctx->blob);
353 ctx->last_interface_type = var->interface_type;
354 }
355 }
356
357 if (flags.u.has_name) {
358 const char *name = blob_read_string(ctx->blob);
359 var->name = ralloc_strdup(var, name);
360 } else {
361 var->name = NULL;
362 }
363
364 if (flags.u.data_encoding == var_encode_shader_temp)
365 var->data.mode = nir_var_shader_temp;
366 else if (flags.u.data_encoding == var_encode_function_temp)
367 var->data.mode = nir_var_function_temp;
368 else if (flags.u.data_encoding == var_encode_full) {
369 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
370 ctx->last_var_data = var->data;
371 } else { /* var_encode_location_diff */
372 union packed_var_data_diff diff;
373 diff.u32 = blob_read_uint32(ctx->blob);
374
375 var->data = ctx->last_var_data;
376 var->data.location += diff.u.location;
377 var->data.location_frac += diff.u.location_frac;
378 var->data.driver_location += diff.u.driver_location;
379
380 ctx->last_var_data = var->data;
381 }
382
383 var->num_state_slots = flags.u.num_state_slots;
384 if (var->num_state_slots != 0) {
385 var->state_slots = ralloc_array(var, nir_state_slot,
386 var->num_state_slots);
387 for (unsigned i = 0; i < var->num_state_slots; i++) {
388 blob_copy_bytes(ctx->blob, &var->state_slots[i],
389 sizeof(var->state_slots[i]));
390 }
391 }
392 if (flags.u.has_constant_initializer)
393 var->constant_initializer = read_constant(ctx, var);
394 else
395 var->constant_initializer = NULL;
396 var->num_members = flags.u.num_members;
397 if (var->num_members > 0) {
398 var->members = ralloc_array(var, struct nir_variable_data,
399 var->num_members);
400 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
401 var->num_members * sizeof(*var->members));
402 }
403
404 return var;
405 }
406
407 static void
408 write_var_list(write_ctx *ctx, const struct exec_list *src)
409 {
410 blob_write_uint32(ctx->blob, exec_list_length(src));
411 foreach_list_typed(nir_variable, var, node, src) {
412 write_variable(ctx, var);
413 }
414 }
415
416 static void
417 read_var_list(read_ctx *ctx, struct exec_list *dst)
418 {
419 exec_list_make_empty(dst);
420 unsigned num_vars = blob_read_uint32(ctx->blob);
421 for (unsigned i = 0; i < num_vars; i++) {
422 nir_variable *var = read_variable(ctx);
423 exec_list_push_tail(dst, &var->node);
424 }
425 }
426
427 static void
428 write_register(write_ctx *ctx, const nir_register *reg)
429 {
430 write_add_object(ctx, reg);
431 blob_write_uint32(ctx->blob, reg->num_components);
432 blob_write_uint32(ctx->blob, reg->bit_size);
433 blob_write_uint32(ctx->blob, reg->num_array_elems);
434 blob_write_uint32(ctx->blob, reg->index);
435 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
436 if (!ctx->strip && reg->name)
437 blob_write_string(ctx->blob, reg->name);
438 }
439
440 static nir_register *
441 read_register(read_ctx *ctx)
442 {
443 nir_register *reg = ralloc(ctx->nir, nir_register);
444 read_add_object(ctx, reg);
445 reg->num_components = blob_read_uint32(ctx->blob);
446 reg->bit_size = blob_read_uint32(ctx->blob);
447 reg->num_array_elems = blob_read_uint32(ctx->blob);
448 reg->index = blob_read_uint32(ctx->blob);
449 bool has_name = blob_read_uint32(ctx->blob);
450 if (has_name) {
451 const char *name = blob_read_string(ctx->blob);
452 reg->name = ralloc_strdup(reg, name);
453 } else {
454 reg->name = NULL;
455 }
456
457 list_inithead(&reg->uses);
458 list_inithead(&reg->defs);
459 list_inithead(&reg->if_uses);
460
461 return reg;
462 }
463
464 static void
465 write_reg_list(write_ctx *ctx, const struct exec_list *src)
466 {
467 blob_write_uint32(ctx->blob, exec_list_length(src));
468 foreach_list_typed(nir_register, reg, node, src)
469 write_register(ctx, reg);
470 }
471
472 static void
473 read_reg_list(read_ctx *ctx, struct exec_list *dst)
474 {
475 exec_list_make_empty(dst);
476 unsigned num_regs = blob_read_uint32(ctx->blob);
477 for (unsigned i = 0; i < num_regs; i++) {
478 nir_register *reg = read_register(ctx);
479 exec_list_push_tail(dst, &reg->node);
480 }
481 }
482
483 union packed_src {
484 uint32_t u32;
485 struct {
486 unsigned is_ssa:1; /* <-- Header */
487 unsigned is_indirect:1;
488 unsigned object_idx:20;
489 unsigned _footer:10; /* <-- Footer */
490 } any;
491 struct {
492 unsigned _header:22; /* <-- Header */
493 unsigned negate:1; /* <-- Footer */
494 unsigned abs:1;
495 unsigned swizzle_x:2;
496 unsigned swizzle_y:2;
497 unsigned swizzle_z:2;
498 unsigned swizzle_w:2;
499 } alu;
500 struct {
501 unsigned _header:22; /* <-- Header */
502 unsigned src_type:5; /* <-- Footer */
503 unsigned _pad:5;
504 } tex;
505 };
506
507 static void
508 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
509 {
510 /* Since sources are very frequent, we try to save some space when storing
511 * them. In particular, we store whether the source is a register and
512 * whether the register has an indirect index in the low two bits. We can
513 * assume that the high two bits of the index are zero, since otherwise our
514 * address space would've been exhausted allocating the remap table!
515 */
516 header.any.is_ssa = src->is_ssa;
517 if (src->is_ssa) {
518 header.any.object_idx = write_lookup_object(ctx, src->ssa);
519 blob_write_uint32(ctx->blob, header.u32);
520 } else {
521 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
522 header.any.is_indirect = !!src->reg.indirect;
523 blob_write_uint32(ctx->blob, header.u32);
524 blob_write_uint32(ctx->blob, src->reg.base_offset);
525 if (src->reg.indirect) {
526 union packed_src header = {0};
527 write_src_full(ctx, src->reg.indirect, header);
528 }
529 }
530 }
531
532 static void
533 write_src(write_ctx *ctx, const nir_src *src)
534 {
535 union packed_src header = {0};
536 write_src_full(ctx, src, header);
537 }
538
539 static union packed_src
540 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
541 {
542 STATIC_ASSERT(sizeof(union packed_src) == 4);
543 union packed_src header;
544 header.u32 = blob_read_uint32(ctx->blob);
545
546 src->is_ssa = header.any.is_ssa;
547 if (src->is_ssa) {
548 src->ssa = read_lookup_object(ctx, header.any.object_idx);
549 } else {
550 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
551 src->reg.base_offset = blob_read_uint32(ctx->blob);
552 if (header.any.is_indirect) {
553 src->reg.indirect = ralloc(mem_ctx, nir_src);
554 read_src(ctx, src->reg.indirect, mem_ctx);
555 } else {
556 src->reg.indirect = NULL;
557 }
558 }
559 return header;
560 }
561
562 union packed_dest {
563 uint8_t u8;
564 struct {
565 uint8_t is_ssa:1;
566 uint8_t has_name:1;
567 uint8_t num_components:3;
568 uint8_t bit_size:3;
569 } ssa;
570 struct {
571 uint8_t is_ssa:1;
572 uint8_t is_indirect:1;
573 uint8_t _pad:6;
574 } reg;
575 };
576
577 union packed_instr {
578 uint32_t u32;
579 struct {
580 unsigned instr_type:4; /* always present */
581 unsigned _pad:20;
582 unsigned dest:8; /* always last */
583 } any;
584 struct {
585 unsigned instr_type:4;
586 unsigned exact:1;
587 unsigned no_signed_wrap:1;
588 unsigned no_unsigned_wrap:1;
589 unsigned saturate:1;
590 unsigned writemask:4;
591 unsigned op:9;
592 unsigned _pad:3;
593 unsigned dest:8;
594 } alu;
595 struct {
596 unsigned instr_type:4;
597 unsigned deref_type:3;
598 unsigned mode:10;
599 unsigned _pad:7;
600 unsigned dest:8;
601 } deref;
602 struct {
603 unsigned instr_type:4;
604 unsigned intrinsic:9;
605 unsigned num_components:3;
606 unsigned _pad:8;
607 unsigned dest:8;
608 } intrinsic;
609 struct {
610 unsigned instr_type:4;
611 unsigned last_component:4;
612 unsigned bit_size:3;
613 unsigned _pad:21;
614 } load_const;
615 struct {
616 unsigned instr_type:4;
617 unsigned last_component:4;
618 unsigned bit_size:3;
619 unsigned _pad:21;
620 } undef;
621 struct {
622 unsigned instr_type:4;
623 unsigned num_srcs:4;
624 unsigned op:4;
625 unsigned texture_array_size:12;
626 unsigned dest:8;
627 } tex;
628 struct {
629 unsigned instr_type:4;
630 unsigned num_srcs:20;
631 unsigned dest:8;
632 } phi;
633 struct {
634 unsigned instr_type:4;
635 unsigned type:2;
636 unsigned _pad:26;
637 } jump;
638 };
639
640 /* Write "lo24" as low 24 bits in the first uint32. */
641 static void
642 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header)
643 {
644 STATIC_ASSERT(sizeof(union packed_dest) == 1);
645 union packed_dest dest;
646 dest.u8 = 0;
647
648 dest.ssa.is_ssa = dst->is_ssa;
649 if (dst->is_ssa) {
650 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
651 dest.ssa.num_components =
652 encode_num_components_in_3bits(dst->ssa.num_components);
653 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
654 } else {
655 dest.reg.is_indirect = !!(dst->reg.indirect);
656 }
657
658 header.any.dest = dest.u8;
659 blob_write_uint32(ctx->blob, header.u32);
660
661 if (dst->is_ssa) {
662 write_add_object(ctx, &dst->ssa);
663 if (dest.ssa.has_name)
664 blob_write_string(ctx->blob, dst->ssa.name);
665 } else {
666 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
667 blob_write_uint32(ctx->blob, dst->reg.base_offset);
668 if (dst->reg.indirect)
669 write_src(ctx, dst->reg.indirect);
670 }
671 }
672
673 static void
674 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
675 union packed_instr header)
676 {
677 union packed_dest dest;
678 dest.u8 = header.any.dest;
679
680 if (dest.ssa.is_ssa) {
681 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
682 unsigned num_components =
683 decode_num_components_in_3bits(dest.ssa.num_components);
684 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
685 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
686 read_add_object(ctx, &dst->ssa);
687 } else {
688 dst->reg.reg = read_object(ctx);
689 dst->reg.base_offset = blob_read_uint32(ctx->blob);
690 if (dest.reg.is_indirect) {
691 dst->reg.indirect = ralloc(instr, nir_src);
692 read_src(ctx, dst->reg.indirect, instr);
693 }
694 }
695 }
696
697 static void
698 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
699 {
700 /* 9 bits for nir_op */
701 STATIC_ASSERT(nir_num_opcodes <= 512);
702 union packed_instr header;
703 header.u32 = 0;
704
705 header.alu.instr_type = alu->instr.type;
706 header.alu.exact = alu->exact;
707 header.alu.no_signed_wrap = alu->no_signed_wrap;
708 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
709 header.alu.saturate = alu->dest.saturate;
710 header.alu.writemask = alu->dest.write_mask;
711 header.alu.op = alu->op;
712
713 write_dest(ctx, &alu->dest.dest, header);
714
715 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
716 union packed_src src;
717 src.u32 = 0;
718
719 src.alu.negate = alu->src[i].negate;
720 src.alu.abs = alu->src[i].abs;
721 src.alu.swizzle_x = alu->src[i].swizzle[0];
722 src.alu.swizzle_y = alu->src[i].swizzle[1];
723 src.alu.swizzle_z = alu->src[i].swizzle[2];
724 src.alu.swizzle_w = alu->src[i].swizzle[3];
725
726 write_src_full(ctx, &alu->src[i].src, src);
727 }
728 }
729
730 static nir_alu_instr *
731 read_alu(read_ctx *ctx, union packed_instr header)
732 {
733 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
734
735 alu->exact = header.alu.exact;
736 alu->no_signed_wrap = header.alu.no_signed_wrap;
737 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
738 alu->dest.saturate = header.alu.saturate;
739 alu->dest.write_mask = header.alu.writemask;
740
741 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
742
743 for (unsigned i = 0; i < nir_op_infos[header.alu.op].num_inputs; i++) {
744 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
745
746 alu->src[i].negate = src.alu.negate;
747 alu->src[i].abs = src.alu.abs;
748 alu->src[i].swizzle[0] = src.alu.swizzle_x;
749 alu->src[i].swizzle[1] = src.alu.swizzle_y;
750 alu->src[i].swizzle[2] = src.alu.swizzle_z;
751 alu->src[i].swizzle[3] = src.alu.swizzle_w;
752 }
753
754 return alu;
755 }
756
757 static void
758 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
759 {
760 assert(deref->deref_type < 8);
761 assert(deref->mode < (1 << 10));
762
763 union packed_instr header;
764 header.u32 = 0;
765
766 header.deref.instr_type = deref->instr.type;
767 header.deref.deref_type = deref->deref_type;
768 header.deref.mode = deref->mode;
769
770 write_dest(ctx, &deref->dest, header);
771 encode_type_to_blob(ctx->blob, deref->type);
772
773 if (deref->deref_type == nir_deref_type_var) {
774 write_object(ctx, deref->var);
775 return;
776 }
777
778 write_src(ctx, &deref->parent);
779
780 switch (deref->deref_type) {
781 case nir_deref_type_struct:
782 blob_write_uint32(ctx->blob, deref->strct.index);
783 break;
784
785 case nir_deref_type_array:
786 case nir_deref_type_ptr_as_array:
787 write_src(ctx, &deref->arr.index);
788 break;
789
790 case nir_deref_type_cast:
791 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
792 break;
793
794 case nir_deref_type_array_wildcard:
795 /* Nothing to do */
796 break;
797
798 default:
799 unreachable("Invalid deref type");
800 }
801 }
802
803 static nir_deref_instr *
804 read_deref(read_ctx *ctx, union packed_instr header)
805 {
806 nir_deref_type deref_type = header.deref.deref_type;
807 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
808
809 read_dest(ctx, &deref->dest, &deref->instr, header);
810
811 deref->mode = header.deref.mode;
812 deref->type = decode_type_from_blob(ctx->blob);
813
814 if (deref_type == nir_deref_type_var) {
815 deref->var = read_object(ctx);
816 return deref;
817 }
818
819 read_src(ctx, &deref->parent, &deref->instr);
820
821 switch (deref->deref_type) {
822 case nir_deref_type_struct:
823 deref->strct.index = blob_read_uint32(ctx->blob);
824 break;
825
826 case nir_deref_type_array:
827 case nir_deref_type_ptr_as_array:
828 read_src(ctx, &deref->arr.index, &deref->instr);
829 break;
830
831 case nir_deref_type_cast:
832 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
833 break;
834
835 case nir_deref_type_array_wildcard:
836 /* Nothing to do */
837 break;
838
839 default:
840 unreachable("Invalid deref type");
841 }
842
843 return deref;
844 }
845
846 static void
847 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
848 {
849 /* 9 bits for nir_intrinsic_op */
850 STATIC_ASSERT(nir_num_intrinsics <= 512);
851 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
852 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
853 assert(intrin->intrinsic < 512);
854
855 union packed_instr header;
856 header.u32 = 0;
857
858 header.intrinsic.instr_type = intrin->instr.type;
859 header.intrinsic.intrinsic = intrin->intrinsic;
860 header.intrinsic.num_components =
861 encode_num_components_in_3bits(intrin->num_components);
862
863 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
864 write_dest(ctx, &intrin->dest, header);
865 else
866 blob_write_uint32(ctx->blob, header.u32);
867
868 for (unsigned i = 0; i < num_srcs; i++)
869 write_src(ctx, &intrin->src[i]);
870
871 for (unsigned i = 0; i < num_indices; i++)
872 blob_write_uint32(ctx->blob, intrin->const_index[i]);
873 }
874
875 static nir_intrinsic_instr *
876 read_intrinsic(read_ctx *ctx, union packed_instr header)
877 {
878 nir_intrinsic_op op = header.intrinsic.intrinsic;
879 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
880
881 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
882 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
883
884 intrin->num_components =
885 decode_num_components_in_3bits(header.intrinsic.num_components);
886
887 if (nir_intrinsic_infos[op].has_dest)
888 read_dest(ctx, &intrin->dest, &intrin->instr, header);
889
890 for (unsigned i = 0; i < num_srcs; i++)
891 read_src(ctx, &intrin->src[i], &intrin->instr);
892
893 for (unsigned i = 0; i < num_indices; i++)
894 intrin->const_index[i] = blob_read_uint32(ctx->blob);
895
896 return intrin;
897 }
898
899 static void
900 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
901 {
902 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
903 union packed_instr header;
904 header.u32 = 0;
905
906 header.load_const.instr_type = lc->instr.type;
907 header.load_const.last_component = lc->def.num_components - 1;
908 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
909
910 blob_write_uint32(ctx->blob, header.u32);
911
912 switch (lc->def.bit_size) {
913 case 64:
914 blob_write_bytes(ctx->blob, lc->value,
915 sizeof(*lc->value) * lc->def.num_components);
916 break;
917
918 case 32:
919 for (unsigned i = 0; i < lc->def.num_components; i++)
920 blob_write_uint32(ctx->blob, lc->value[i].u32);
921 break;
922
923 case 16:
924 for (unsigned i = 0; i < lc->def.num_components; i++)
925 blob_write_uint16(ctx->blob, lc->value[i].u16);
926 break;
927
928 default:
929 assert(lc->def.bit_size <= 8);
930 for (unsigned i = 0; i < lc->def.num_components; i++)
931 blob_write_uint8(ctx->blob, lc->value[i].u8);
932 break;
933 }
934
935 write_add_object(ctx, &lc->def);
936 }
937
938 static nir_load_const_instr *
939 read_load_const(read_ctx *ctx, union packed_instr header)
940 {
941 nir_load_const_instr *lc =
942 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
943 decode_bit_size_3bits(header.load_const.bit_size));
944
945 switch (lc->def.bit_size) {
946 case 64:
947 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
948 break;
949
950 case 32:
951 for (unsigned i = 0; i < lc->def.num_components; i++)
952 lc->value[i].u32 = blob_read_uint32(ctx->blob);
953 break;
954
955 case 16:
956 for (unsigned i = 0; i < lc->def.num_components; i++)
957 lc->value[i].u16 = blob_read_uint16(ctx->blob);
958 break;
959
960 default:
961 assert(lc->def.bit_size <= 8);
962 for (unsigned i = 0; i < lc->def.num_components; i++)
963 lc->value[i].u8 = blob_read_uint8(ctx->blob);
964 break;
965 }
966
967 read_add_object(ctx, &lc->def);
968 return lc;
969 }
970
971 static void
972 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
973 {
974 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
975
976 union packed_instr header;
977 header.u32 = 0;
978
979 header.undef.instr_type = undef->instr.type;
980 header.undef.last_component = undef->def.num_components - 1;
981 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
982
983 blob_write_uint32(ctx->blob, header.u32);
984 write_add_object(ctx, &undef->def);
985 }
986
987 static nir_ssa_undef_instr *
988 read_ssa_undef(read_ctx *ctx, union packed_instr header)
989 {
990 nir_ssa_undef_instr *undef =
991 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
992 decode_bit_size_3bits(header.undef.bit_size));
993
994 read_add_object(ctx, &undef->def);
995 return undef;
996 }
997
998 union packed_tex_data {
999 uint32_t u32;
1000 struct {
1001 enum glsl_sampler_dim sampler_dim:4;
1002 nir_alu_type dest_type:8;
1003 unsigned coord_components:3;
1004 unsigned is_array:1;
1005 unsigned is_shadow:1;
1006 unsigned is_new_style_shadow:1;
1007 unsigned component:2;
1008 unsigned unused:10; /* Mark unused for valgrind. */
1009 } u;
1010 };
1011
1012 static void
1013 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1014 {
1015 assert(tex->num_srcs < 16);
1016 assert(tex->op < 16);
1017 assert(tex->texture_array_size < 1024);
1018
1019 union packed_instr header;
1020 header.u32 = 0;
1021
1022 header.tex.instr_type = tex->instr.type;
1023 header.tex.num_srcs = tex->num_srcs;
1024 header.tex.op = tex->op;
1025 header.tex.texture_array_size = tex->texture_array_size;
1026
1027 write_dest(ctx, &tex->dest, header);
1028
1029 blob_write_uint32(ctx->blob, tex->texture_index);
1030 blob_write_uint32(ctx->blob, tex->sampler_index);
1031 if (tex->op == nir_texop_tg4)
1032 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1033
1034 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1035 union packed_tex_data packed = {
1036 .u.sampler_dim = tex->sampler_dim,
1037 .u.dest_type = tex->dest_type,
1038 .u.coord_components = tex->coord_components,
1039 .u.is_array = tex->is_array,
1040 .u.is_shadow = tex->is_shadow,
1041 .u.is_new_style_shadow = tex->is_new_style_shadow,
1042 .u.component = tex->component,
1043 };
1044 blob_write_uint32(ctx->blob, packed.u32);
1045
1046 for (unsigned i = 0; i < tex->num_srcs; i++) {
1047 union packed_src src;
1048 src.u32 = 0;
1049 src.tex.src_type = tex->src[i].src_type;
1050 write_src_full(ctx, &tex->src[i].src, src);
1051 }
1052 }
1053
1054 static nir_tex_instr *
1055 read_tex(read_ctx *ctx, union packed_instr header)
1056 {
1057 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1058
1059 read_dest(ctx, &tex->dest, &tex->instr, header);
1060
1061 tex->op = header.tex.op;
1062 tex->texture_index = blob_read_uint32(ctx->blob);
1063 tex->texture_array_size = header.tex.texture_array_size;
1064 tex->sampler_index = blob_read_uint32(ctx->blob);
1065 if (tex->op == nir_texop_tg4)
1066 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1067
1068 union packed_tex_data packed;
1069 packed.u32 = blob_read_uint32(ctx->blob);
1070 tex->sampler_dim = packed.u.sampler_dim;
1071 tex->dest_type = packed.u.dest_type;
1072 tex->coord_components = packed.u.coord_components;
1073 tex->is_array = packed.u.is_array;
1074 tex->is_shadow = packed.u.is_shadow;
1075 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1076 tex->component = packed.u.component;
1077
1078 for (unsigned i = 0; i < tex->num_srcs; i++) {
1079 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1080 tex->src[i].src_type = src.tex.src_type;
1081 }
1082
1083 return tex;
1084 }
1085
1086 static void
1087 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1088 {
1089 union packed_instr header;
1090 header.u32 = 0;
1091
1092 header.phi.instr_type = phi->instr.type;
1093 header.phi.num_srcs = exec_list_length(&phi->srcs);
1094
1095 /* Phi nodes are special, since they may reference SSA definitions and
1096 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1097 * and then store enough information so that a later fixup pass can fill
1098 * them in correctly.
1099 */
1100 write_dest(ctx, &phi->dest, header);
1101
1102 nir_foreach_phi_src(src, phi) {
1103 assert(src->src.is_ssa);
1104 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1105 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1106 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1107 write_phi_fixup fixup = {
1108 .blob_offset = blob_offset,
1109 .src = src->src.ssa,
1110 .block = src->pred,
1111 };
1112 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1113 }
1114 }
1115
1116 static void
1117 write_fixup_phis(write_ctx *ctx)
1118 {
1119 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1120 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1121 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1122 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1123 }
1124
1125 util_dynarray_clear(&ctx->phi_fixups);
1126 }
1127
1128 static nir_phi_instr *
1129 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1130 {
1131 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1132
1133 read_dest(ctx, &phi->dest, &phi->instr, header);
1134
1135 /* For similar reasons as before, we just store the index directly into the
1136 * pointer, and let a later pass resolve the phi sources.
1137 *
1138 * In order to ensure that the copied sources (which are just the indices
1139 * from the blob for now) don't get inserted into the old shader's use-def
1140 * lists, we have to add the phi instruction *before* we set up its
1141 * sources.
1142 */
1143 nir_instr_insert_after_block(blk, &phi->instr);
1144
1145 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1146 nir_phi_src *src = ralloc(phi, nir_phi_src);
1147
1148 src->src.is_ssa = true;
1149 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1150 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1151
1152 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1153 * we have to set the parent_instr manually. It doesn't really matter
1154 * when we do it, so we might as well do it here.
1155 */
1156 src->src.parent_instr = &phi->instr;
1157
1158 /* Stash it in the list of phi sources. We'll walk this list and fix up
1159 * sources at the very end of read_function_impl.
1160 */
1161 list_add(&src->src.use_link, &ctx->phi_srcs);
1162
1163 exec_list_push_tail(&phi->srcs, &src->node);
1164 }
1165
1166 return phi;
1167 }
1168
1169 static void
1170 read_fixup_phis(read_ctx *ctx)
1171 {
1172 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1173 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1174 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1175
1176 /* Remove from this list */
1177 list_del(&src->src.use_link);
1178
1179 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1180 }
1181 assert(list_is_empty(&ctx->phi_srcs));
1182 }
1183
1184 static void
1185 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1186 {
1187 assert(jmp->type < 4);
1188
1189 union packed_instr header;
1190 header.u32 = 0;
1191
1192 header.jump.instr_type = jmp->instr.type;
1193 header.jump.type = jmp->type;
1194
1195 blob_write_uint32(ctx->blob, header.u32);
1196 }
1197
1198 static nir_jump_instr *
1199 read_jump(read_ctx *ctx, union packed_instr header)
1200 {
1201 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1202 return jmp;
1203 }
1204
1205 static void
1206 write_call(write_ctx *ctx, const nir_call_instr *call)
1207 {
1208 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1209
1210 for (unsigned i = 0; i < call->num_params; i++)
1211 write_src(ctx, &call->params[i]);
1212 }
1213
1214 static nir_call_instr *
1215 read_call(read_ctx *ctx)
1216 {
1217 nir_function *callee = read_object(ctx);
1218 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1219
1220 for (unsigned i = 0; i < call->num_params; i++)
1221 read_src(ctx, &call->params[i], call);
1222
1223 return call;
1224 }
1225
1226 static void
1227 write_instr(write_ctx *ctx, const nir_instr *instr)
1228 {
1229 /* We have only 4 bits for the instruction type. */
1230 assert(instr->type < 16);
1231
1232 switch (instr->type) {
1233 case nir_instr_type_alu:
1234 write_alu(ctx, nir_instr_as_alu(instr));
1235 break;
1236 case nir_instr_type_deref:
1237 write_deref(ctx, nir_instr_as_deref(instr));
1238 break;
1239 case nir_instr_type_intrinsic:
1240 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1241 break;
1242 case nir_instr_type_load_const:
1243 write_load_const(ctx, nir_instr_as_load_const(instr));
1244 break;
1245 case nir_instr_type_ssa_undef:
1246 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1247 break;
1248 case nir_instr_type_tex:
1249 write_tex(ctx, nir_instr_as_tex(instr));
1250 break;
1251 case nir_instr_type_phi:
1252 write_phi(ctx, nir_instr_as_phi(instr));
1253 break;
1254 case nir_instr_type_jump:
1255 write_jump(ctx, nir_instr_as_jump(instr));
1256 break;
1257 case nir_instr_type_call:
1258 blob_write_uint32(ctx->blob, instr->type);
1259 write_call(ctx, nir_instr_as_call(instr));
1260 break;
1261 case nir_instr_type_parallel_copy:
1262 unreachable("Cannot write parallel copies");
1263 default:
1264 unreachable("bad instr type");
1265 }
1266 }
1267
1268 static void
1269 read_instr(read_ctx *ctx, nir_block *block)
1270 {
1271 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1272 union packed_instr header;
1273 header.u32 = blob_read_uint32(ctx->blob);
1274 nir_instr *instr;
1275
1276 switch (header.any.instr_type) {
1277 case nir_instr_type_alu:
1278 instr = &read_alu(ctx, header)->instr;
1279 break;
1280 case nir_instr_type_deref:
1281 instr = &read_deref(ctx, header)->instr;
1282 break;
1283 case nir_instr_type_intrinsic:
1284 instr = &read_intrinsic(ctx, header)->instr;
1285 break;
1286 case nir_instr_type_load_const:
1287 instr = &read_load_const(ctx, header)->instr;
1288 break;
1289 case nir_instr_type_ssa_undef:
1290 instr = &read_ssa_undef(ctx, header)->instr;
1291 break;
1292 case nir_instr_type_tex:
1293 instr = &read_tex(ctx, header)->instr;
1294 break;
1295 case nir_instr_type_phi:
1296 /* Phi instructions are a bit of a special case when reading because we
1297 * don't want inserting the instruction to automatically handle use/defs
1298 * for us. Instead, we need to wait until all the blocks/instructions
1299 * are read so that we can set their sources up.
1300 */
1301 read_phi(ctx, block, header);
1302 return;
1303 case nir_instr_type_jump:
1304 instr = &read_jump(ctx, header)->instr;
1305 break;
1306 case nir_instr_type_call:
1307 instr = &read_call(ctx)->instr;
1308 break;
1309 case nir_instr_type_parallel_copy:
1310 unreachable("Cannot read parallel copies");
1311 default:
1312 unreachable("bad instr type");
1313 }
1314
1315 nir_instr_insert_after_block(block, instr);
1316 }
1317
1318 static void
1319 write_block(write_ctx *ctx, const nir_block *block)
1320 {
1321 write_add_object(ctx, block);
1322 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1323 nir_foreach_instr(instr, block)
1324 write_instr(ctx, instr);
1325 }
1326
1327 static void
1328 read_block(read_ctx *ctx, struct exec_list *cf_list)
1329 {
1330 /* Don't actually create a new block. Just use the one from the tail of
1331 * the list. NIR guarantees that the tail of the list is a block and that
1332 * no two blocks are side-by-side in the IR; It should be empty.
1333 */
1334 nir_block *block =
1335 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1336
1337 read_add_object(ctx, block);
1338 unsigned num_instrs = blob_read_uint32(ctx->blob);
1339 for (unsigned i = 0; i < num_instrs; i++) {
1340 read_instr(ctx, block);
1341 }
1342 }
1343
1344 static void
1345 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1346
1347 static void
1348 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1349
1350 static void
1351 write_if(write_ctx *ctx, nir_if *nif)
1352 {
1353 write_src(ctx, &nif->condition);
1354
1355 write_cf_list(ctx, &nif->then_list);
1356 write_cf_list(ctx, &nif->else_list);
1357 }
1358
1359 static void
1360 read_if(read_ctx *ctx, struct exec_list *cf_list)
1361 {
1362 nir_if *nif = nir_if_create(ctx->nir);
1363
1364 read_src(ctx, &nif->condition, nif);
1365
1366 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1367
1368 read_cf_list(ctx, &nif->then_list);
1369 read_cf_list(ctx, &nif->else_list);
1370 }
1371
1372 static void
1373 write_loop(write_ctx *ctx, nir_loop *loop)
1374 {
1375 write_cf_list(ctx, &loop->body);
1376 }
1377
1378 static void
1379 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1380 {
1381 nir_loop *loop = nir_loop_create(ctx->nir);
1382
1383 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1384
1385 read_cf_list(ctx, &loop->body);
1386 }
1387
1388 static void
1389 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1390 {
1391 blob_write_uint32(ctx->blob, cf->type);
1392
1393 switch (cf->type) {
1394 case nir_cf_node_block:
1395 write_block(ctx, nir_cf_node_as_block(cf));
1396 break;
1397 case nir_cf_node_if:
1398 write_if(ctx, nir_cf_node_as_if(cf));
1399 break;
1400 case nir_cf_node_loop:
1401 write_loop(ctx, nir_cf_node_as_loop(cf));
1402 break;
1403 default:
1404 unreachable("bad cf type");
1405 }
1406 }
1407
1408 static void
1409 read_cf_node(read_ctx *ctx, struct exec_list *list)
1410 {
1411 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1412
1413 switch (type) {
1414 case nir_cf_node_block:
1415 read_block(ctx, list);
1416 break;
1417 case nir_cf_node_if:
1418 read_if(ctx, list);
1419 break;
1420 case nir_cf_node_loop:
1421 read_loop(ctx, list);
1422 break;
1423 default:
1424 unreachable("bad cf type");
1425 }
1426 }
1427
1428 static void
1429 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1430 {
1431 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1432 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1433 write_cf_node(ctx, cf);
1434 }
1435 }
1436
1437 static void
1438 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1439 {
1440 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1441 for (unsigned i = 0; i < num_cf_nodes; i++)
1442 read_cf_node(ctx, cf_list);
1443 }
1444
1445 static void
1446 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1447 {
1448 write_var_list(ctx, &fi->locals);
1449 write_reg_list(ctx, &fi->registers);
1450 blob_write_uint32(ctx->blob, fi->reg_alloc);
1451
1452 write_cf_list(ctx, &fi->body);
1453 write_fixup_phis(ctx);
1454 }
1455
1456 static nir_function_impl *
1457 read_function_impl(read_ctx *ctx, nir_function *fxn)
1458 {
1459 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1460 fi->function = fxn;
1461
1462 read_var_list(ctx, &fi->locals);
1463 read_reg_list(ctx, &fi->registers);
1464 fi->reg_alloc = blob_read_uint32(ctx->blob);
1465
1466 read_cf_list(ctx, &fi->body);
1467 read_fixup_phis(ctx);
1468
1469 fi->valid_metadata = 0;
1470
1471 return fi;
1472 }
1473
1474 static void
1475 write_function(write_ctx *ctx, const nir_function *fxn)
1476 {
1477 uint32_t flags = fxn->is_entrypoint;
1478 if (fxn->name)
1479 flags |= 0x2;
1480 if (fxn->impl)
1481 flags |= 0x4;
1482 blob_write_uint32(ctx->blob, flags);
1483 if (fxn->name)
1484 blob_write_string(ctx->blob, fxn->name);
1485
1486 write_add_object(ctx, fxn);
1487
1488 blob_write_uint32(ctx->blob, fxn->num_params);
1489 for (unsigned i = 0; i < fxn->num_params; i++) {
1490 uint32_t val =
1491 ((uint32_t)fxn->params[i].num_components) |
1492 ((uint32_t)fxn->params[i].bit_size) << 8;
1493 blob_write_uint32(ctx->blob, val);
1494 }
1495
1496 /* At first glance, it looks like we should write the function_impl here.
1497 * However, call instructions need to be able to reference at least the
1498 * function and those will get processed as we write the function_impls.
1499 * We stop here and write function_impls as a second pass.
1500 */
1501 }
1502
1503 static void
1504 read_function(read_ctx *ctx)
1505 {
1506 uint32_t flags = blob_read_uint32(ctx->blob);
1507 bool has_name = flags & 0x2;
1508 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1509
1510 nir_function *fxn = nir_function_create(ctx->nir, name);
1511
1512 read_add_object(ctx, fxn);
1513
1514 fxn->num_params = blob_read_uint32(ctx->blob);
1515 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1516 for (unsigned i = 0; i < fxn->num_params; i++) {
1517 uint32_t val = blob_read_uint32(ctx->blob);
1518 fxn->params[i].num_components = val & 0xff;
1519 fxn->params[i].bit_size = (val >> 8) & 0xff;
1520 }
1521
1522 fxn->is_entrypoint = flags & 0x1;
1523 if (flags & 0x4)
1524 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1525 }
1526
1527 /**
1528 * Serialize NIR into a binary blob.
1529 *
1530 * \param strip Don't serialize information only useful for debugging,
1531 * such as variable names, making cache hits from similar
1532 * shaders more likely.
1533 */
1534 void
1535 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1536 {
1537 write_ctx ctx = {0};
1538 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1539 ctx.blob = blob;
1540 ctx.nir = nir;
1541 ctx.strip = strip;
1542 util_dynarray_init(&ctx.phi_fixups, NULL);
1543
1544 size_t idx_size_offset = blob_reserve_uint32(blob);
1545
1546 struct shader_info info = nir->info;
1547 uint32_t strings = 0;
1548 if (!strip && info.name)
1549 strings |= 0x1;
1550 if (!strip && info.label)
1551 strings |= 0x2;
1552 blob_write_uint32(blob, strings);
1553 if (!strip && info.name)
1554 blob_write_string(blob, info.name);
1555 if (!strip && info.label)
1556 blob_write_string(blob, info.label);
1557 info.name = info.label = NULL;
1558 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1559
1560 write_var_list(&ctx, &nir->uniforms);
1561 write_var_list(&ctx, &nir->inputs);
1562 write_var_list(&ctx, &nir->outputs);
1563 write_var_list(&ctx, &nir->shared);
1564 write_var_list(&ctx, &nir->globals);
1565 write_var_list(&ctx, &nir->system_values);
1566
1567 blob_write_uint32(blob, nir->num_inputs);
1568 blob_write_uint32(blob, nir->num_uniforms);
1569 blob_write_uint32(blob, nir->num_outputs);
1570 blob_write_uint32(blob, nir->num_shared);
1571 blob_write_uint32(blob, nir->scratch_size);
1572
1573 blob_write_uint32(blob, exec_list_length(&nir->functions));
1574 nir_foreach_function(fxn, nir) {
1575 write_function(&ctx, fxn);
1576 }
1577
1578 nir_foreach_function(fxn, nir) {
1579 if (fxn->impl)
1580 write_function_impl(&ctx, fxn->impl);
1581 }
1582
1583 blob_write_uint32(blob, nir->constant_data_size);
1584 if (nir->constant_data_size > 0)
1585 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
1586
1587 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1588
1589 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1590 util_dynarray_fini(&ctx.phi_fixups);
1591 }
1592
1593 nir_shader *
1594 nir_deserialize(void *mem_ctx,
1595 const struct nir_shader_compiler_options *options,
1596 struct blob_reader *blob)
1597 {
1598 read_ctx ctx = {0};
1599 ctx.blob = blob;
1600 list_inithead(&ctx.phi_srcs);
1601 ctx.idx_table_len = blob_read_uint32(blob);
1602 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1603
1604 uint32_t strings = blob_read_uint32(blob);
1605 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1606 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1607
1608 struct shader_info info;
1609 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1610
1611 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1612
1613 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1614 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1615
1616 ctx.nir->info = info;
1617
1618 read_var_list(&ctx, &ctx.nir->uniforms);
1619 read_var_list(&ctx, &ctx.nir->inputs);
1620 read_var_list(&ctx, &ctx.nir->outputs);
1621 read_var_list(&ctx, &ctx.nir->shared);
1622 read_var_list(&ctx, &ctx.nir->globals);
1623 read_var_list(&ctx, &ctx.nir->system_values);
1624
1625 ctx.nir->num_inputs = blob_read_uint32(blob);
1626 ctx.nir->num_uniforms = blob_read_uint32(blob);
1627 ctx.nir->num_outputs = blob_read_uint32(blob);
1628 ctx.nir->num_shared = blob_read_uint32(blob);
1629 ctx.nir->scratch_size = blob_read_uint32(blob);
1630
1631 unsigned num_functions = blob_read_uint32(blob);
1632 for (unsigned i = 0; i < num_functions; i++)
1633 read_function(&ctx);
1634
1635 nir_foreach_function(fxn, ctx.nir) {
1636 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
1637 fxn->impl = read_function_impl(&ctx, fxn);
1638 }
1639
1640 ctx.nir->constant_data_size = blob_read_uint32(blob);
1641 if (ctx.nir->constant_data_size > 0) {
1642 ctx.nir->constant_data =
1643 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
1644 blob_copy_bytes(blob, ctx.nir->constant_data,
1645 ctx.nir->constant_data_size);
1646 }
1647
1648 free(ctx.idx_table);
1649
1650 return ctx.nir;
1651 }
1652
1653 void
1654 nir_shader_serialize_deserialize(nir_shader *shader)
1655 {
1656 const struct nir_shader_compiler_options *options = shader->options;
1657
1658 struct blob writer;
1659 blob_init(&writer);
1660 nir_serialize(&writer, shader, false);
1661
1662 /* Delete all of dest's ralloc children but leave dest alone */
1663 void *dead_ctx = ralloc_context(NULL);
1664 ralloc_adopt(dead_ctx, shader);
1665 ralloc_free(dead_ctx);
1666
1667 dead_ctx = ralloc_context(NULL);
1668
1669 struct blob_reader reader;
1670 blob_reader_init(&reader, writer.data, writer.size);
1671 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
1672
1673 blob_finish(&writer);
1674
1675 nir_shader_replace(shader, copy);
1676 ralloc_free(dead_ctx);
1677 }