85db23beef8e98b185dab67c96772fd7ecf596ae
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* The last serialized type. */
55 const struct glsl_type *last_type;
56 const struct glsl_type *last_interface_type;
57 struct nir_variable_data last_var_data;
58
59 /* For skipping equal ALU headers (typical after scalarization). */
60 nir_instr_type last_instr_type;
61 uintptr_t last_alu_header_offset;
62
63 /* Don't write optional data such as variable names. */
64 bool strip;
65 } write_ctx;
66
67 typedef struct {
68 nir_shader *nir;
69
70 struct blob_reader *blob;
71
72 /* the next index to assign to a NIR in-memory object */
73 uint32_t next_idx;
74
75 /* The length of the index -> object table */
76 uint32_t idx_table_len;
77
78 /* map from index to deserialized pointer */
79 void **idx_table;
80
81 /* List of phi sources. */
82 struct list_head phi_srcs;
83
84 /* The last deserialized type. */
85 const struct glsl_type *last_type;
86 const struct glsl_type *last_interface_type;
87 struct nir_variable_data last_var_data;
88 } read_ctx;
89
90 static void
91 write_add_object(write_ctx *ctx, const void *obj)
92 {
93 uint32_t index = ctx->next_idx++;
94 assert(index != MAX_OBJECT_IDS);
95 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96 }
97
98 static uint32_t
99 write_lookup_object(write_ctx *ctx, const void *obj)
100 {
101 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102 assert(entry);
103 return (uint32_t)(uintptr_t) entry->data;
104 }
105
106 static void
107 read_add_object(read_ctx *ctx, void *obj)
108 {
109 assert(ctx->next_idx < ctx->idx_table_len);
110 ctx->idx_table[ctx->next_idx++] = obj;
111 }
112
113 static void *
114 read_lookup_object(read_ctx *ctx, uint32_t idx)
115 {
116 assert(idx < ctx->idx_table_len);
117 return ctx->idx_table[idx];
118 }
119
120 static void *
121 read_object(read_ctx *ctx)
122 {
123 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124 }
125
126 static uint32_t
127 encode_bit_size_3bits(uint8_t bit_size)
128 {
129 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131 if (bit_size)
132 return util_logbase2(bit_size) + 1;
133 return 0;
134 }
135
136 static uint8_t
137 decode_bit_size_3bits(uint8_t bit_size)
138 {
139 if (bit_size)
140 return 1 << (bit_size - 1);
141 return 0;
142 }
143
144 static uint8_t
145 encode_num_components_in_3bits(uint8_t num_components)
146 {
147 if (num_components <= 4)
148 return num_components;
149 if (num_components == 8)
150 return 5;
151 if (num_components == 16)
152 return 6;
153
154 unreachable("invalid number in num_components");
155 return 0;
156 }
157
158 static uint8_t
159 decode_num_components_in_3bits(uint8_t value)
160 {
161 if (value <= 4)
162 return value;
163 if (value == 5)
164 return 8;
165 if (value == 6)
166 return 16;
167
168 unreachable("invalid num_components encoding");
169 return 0;
170 }
171
172 static void
173 write_constant(write_ctx *ctx, const nir_constant *c)
174 {
175 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
176 blob_write_uint32(ctx->blob, c->num_elements);
177 for (unsigned i = 0; i < c->num_elements; i++)
178 write_constant(ctx, c->elements[i]);
179 }
180
181 static nir_constant *
182 read_constant(read_ctx *ctx, nir_variable *nvar)
183 {
184 nir_constant *c = ralloc(nvar, nir_constant);
185
186 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
187 c->num_elements = blob_read_uint32(ctx->blob);
188 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
189 for (unsigned i = 0; i < c->num_elements; i++)
190 c->elements[i] = read_constant(ctx, nvar);
191
192 return c;
193 }
194
195 enum var_data_encoding {
196 var_encode_full,
197 var_encode_shader_temp,
198 var_encode_function_temp,
199 var_encode_location_diff,
200 };
201
202 union packed_var {
203 uint32_t u32;
204 struct {
205 unsigned has_name:1;
206 unsigned has_constant_initializer:1;
207 unsigned has_interface_type:1;
208 unsigned num_state_slots:7;
209 unsigned data_encoding:2;
210 unsigned type_same_as_last:1;
211 unsigned interface_type_same_as_last:1;
212 unsigned _pad:2;
213 unsigned num_members:16;
214 } u;
215 };
216
217 union packed_var_data_diff {
218 uint32_t u32;
219 struct {
220 int location:13;
221 int location_frac:3;
222 int driver_location:16;
223 } u;
224 };
225
226 static void
227 write_variable(write_ctx *ctx, const nir_variable *var)
228 {
229 write_add_object(ctx, var);
230
231 assert(var->num_state_slots < (1 << 7));
232 assert(var->num_members < (1 << 16));
233
234 STATIC_ASSERT(sizeof(union packed_var) == 4);
235 union packed_var flags;
236 flags.u32 = 0;
237
238 flags.u.has_name = !ctx->strip && var->name;
239 flags.u.has_constant_initializer = !!(var->constant_initializer);
240 flags.u.has_interface_type = !!(var->interface_type);
241 flags.u.type_same_as_last = var->type == ctx->last_type;
242 flags.u.interface_type_same_as_last =
243 var->interface_type && var->interface_type == ctx->last_interface_type;
244 flags.u.num_state_slots = var->num_state_slots;
245 flags.u.num_members = var->num_members;
246
247 struct nir_variable_data data = var->data;
248
249 /* When stripping, we expect that the location is no longer needed,
250 * which is typically after shaders are linked.
251 */
252 if (ctx->strip &&
253 data.mode != nir_var_shader_in &&
254 data.mode != nir_var_shader_out)
255 data.location = 0;
256
257 /* Temporary variables don't serialize var->data. */
258 if (data.mode == nir_var_shader_temp)
259 flags.u.data_encoding = var_encode_shader_temp;
260 else if (data.mode == nir_var_function_temp)
261 flags.u.data_encoding = var_encode_function_temp;
262 else {
263 struct nir_variable_data tmp = data;
264
265 tmp.location = ctx->last_var_data.location;
266 tmp.location_frac = ctx->last_var_data.location_frac;
267 tmp.driver_location = ctx->last_var_data.driver_location;
268
269 /* See if we can encode only the difference in locations from the last
270 * variable.
271 */
272 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
273 abs((int)data.location -
274 (int)ctx->last_var_data.location) < (1 << 12) &&
275 abs((int)data.driver_location -
276 (int)ctx->last_var_data.driver_location) < (1 << 15))
277 flags.u.data_encoding = var_encode_location_diff;
278 else
279 flags.u.data_encoding = var_encode_full;
280 }
281
282 blob_write_uint32(ctx->blob, flags.u32);
283
284 if (!flags.u.type_same_as_last) {
285 encode_type_to_blob(ctx->blob, var->type);
286 ctx->last_type = var->type;
287 }
288
289 if (var->interface_type && !flags.u.interface_type_same_as_last) {
290 encode_type_to_blob(ctx->blob, var->interface_type);
291 ctx->last_interface_type = var->interface_type;
292 }
293
294 if (flags.u.has_name)
295 blob_write_string(ctx->blob, var->name);
296
297 if (flags.u.data_encoding == var_encode_full ||
298 flags.u.data_encoding == var_encode_location_diff) {
299 if (flags.u.data_encoding == var_encode_full) {
300 blob_write_bytes(ctx->blob, &data, sizeof(data));
301 } else {
302 /* Serialize only the difference in locations from the last variable.
303 */
304 union packed_var_data_diff diff;
305
306 diff.u.location = data.location - ctx->last_var_data.location;
307 diff.u.location_frac = data.location_frac -
308 ctx->last_var_data.location_frac;
309 diff.u.driver_location = data.driver_location -
310 ctx->last_var_data.driver_location;
311
312 blob_write_uint32(ctx->blob, diff.u32);
313 }
314
315 ctx->last_var_data = data;
316 }
317
318 for (unsigned i = 0; i < var->num_state_slots; i++) {
319 blob_write_bytes(ctx->blob, &var->state_slots[i],
320 sizeof(var->state_slots[i]));
321 }
322 if (var->constant_initializer)
323 write_constant(ctx, var->constant_initializer);
324 if (var->num_members > 0) {
325 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
326 var->num_members * sizeof(*var->members));
327 }
328 }
329
330 static nir_variable *
331 read_variable(read_ctx *ctx)
332 {
333 nir_variable *var = rzalloc(ctx->nir, nir_variable);
334 read_add_object(ctx, var);
335
336 union packed_var flags;
337 flags.u32 = blob_read_uint32(ctx->blob);
338
339 if (flags.u.type_same_as_last) {
340 var->type = ctx->last_type;
341 } else {
342 var->type = decode_type_from_blob(ctx->blob);
343 ctx->last_type = var->type;
344 }
345
346 if (flags.u.has_interface_type) {
347 if (flags.u.interface_type_same_as_last) {
348 var->interface_type = ctx->last_interface_type;
349 } else {
350 var->interface_type = decode_type_from_blob(ctx->blob);
351 ctx->last_interface_type = var->interface_type;
352 }
353 }
354
355 if (flags.u.has_name) {
356 const char *name = blob_read_string(ctx->blob);
357 var->name = ralloc_strdup(var, name);
358 } else {
359 var->name = NULL;
360 }
361
362 if (flags.u.data_encoding == var_encode_shader_temp)
363 var->data.mode = nir_var_shader_temp;
364 else if (flags.u.data_encoding == var_encode_function_temp)
365 var->data.mode = nir_var_function_temp;
366 else if (flags.u.data_encoding == var_encode_full) {
367 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
368 ctx->last_var_data = var->data;
369 } else { /* var_encode_location_diff */
370 union packed_var_data_diff diff;
371 diff.u32 = blob_read_uint32(ctx->blob);
372
373 var->data = ctx->last_var_data;
374 var->data.location += diff.u.location;
375 var->data.location_frac += diff.u.location_frac;
376 var->data.driver_location += diff.u.driver_location;
377
378 ctx->last_var_data = var->data;
379 }
380
381 var->num_state_slots = flags.u.num_state_slots;
382 if (var->num_state_slots != 0) {
383 var->state_slots = ralloc_array(var, nir_state_slot,
384 var->num_state_slots);
385 for (unsigned i = 0; i < var->num_state_slots; i++) {
386 blob_copy_bytes(ctx->blob, &var->state_slots[i],
387 sizeof(var->state_slots[i]));
388 }
389 }
390 if (flags.u.has_constant_initializer)
391 var->constant_initializer = read_constant(ctx, var);
392 else
393 var->constant_initializer = NULL;
394 var->num_members = flags.u.num_members;
395 if (var->num_members > 0) {
396 var->members = ralloc_array(var, struct nir_variable_data,
397 var->num_members);
398 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
399 var->num_members * sizeof(*var->members));
400 }
401
402 return var;
403 }
404
405 static void
406 write_var_list(write_ctx *ctx, const struct exec_list *src)
407 {
408 blob_write_uint32(ctx->blob, exec_list_length(src));
409 foreach_list_typed(nir_variable, var, node, src) {
410 write_variable(ctx, var);
411 }
412 }
413
414 static void
415 read_var_list(read_ctx *ctx, struct exec_list *dst)
416 {
417 exec_list_make_empty(dst);
418 unsigned num_vars = blob_read_uint32(ctx->blob);
419 for (unsigned i = 0; i < num_vars; i++) {
420 nir_variable *var = read_variable(ctx);
421 exec_list_push_tail(dst, &var->node);
422 }
423 }
424
425 static void
426 write_register(write_ctx *ctx, const nir_register *reg)
427 {
428 write_add_object(ctx, reg);
429 blob_write_uint32(ctx->blob, reg->num_components);
430 blob_write_uint32(ctx->blob, reg->bit_size);
431 blob_write_uint32(ctx->blob, reg->num_array_elems);
432 blob_write_uint32(ctx->blob, reg->index);
433 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
434 if (!ctx->strip && reg->name)
435 blob_write_string(ctx->blob, reg->name);
436 }
437
438 static nir_register *
439 read_register(read_ctx *ctx)
440 {
441 nir_register *reg = ralloc(ctx->nir, nir_register);
442 read_add_object(ctx, reg);
443 reg->num_components = blob_read_uint32(ctx->blob);
444 reg->bit_size = blob_read_uint32(ctx->blob);
445 reg->num_array_elems = blob_read_uint32(ctx->blob);
446 reg->index = blob_read_uint32(ctx->blob);
447 bool has_name = blob_read_uint32(ctx->blob);
448 if (has_name) {
449 const char *name = blob_read_string(ctx->blob);
450 reg->name = ralloc_strdup(reg, name);
451 } else {
452 reg->name = NULL;
453 }
454
455 list_inithead(&reg->uses);
456 list_inithead(&reg->defs);
457 list_inithead(&reg->if_uses);
458
459 return reg;
460 }
461
462 static void
463 write_reg_list(write_ctx *ctx, const struct exec_list *src)
464 {
465 blob_write_uint32(ctx->blob, exec_list_length(src));
466 foreach_list_typed(nir_register, reg, node, src)
467 write_register(ctx, reg);
468 }
469
470 static void
471 read_reg_list(read_ctx *ctx, struct exec_list *dst)
472 {
473 exec_list_make_empty(dst);
474 unsigned num_regs = blob_read_uint32(ctx->blob);
475 for (unsigned i = 0; i < num_regs; i++) {
476 nir_register *reg = read_register(ctx);
477 exec_list_push_tail(dst, &reg->node);
478 }
479 }
480
481 union packed_src {
482 uint32_t u32;
483 struct {
484 unsigned is_ssa:1; /* <-- Header */
485 unsigned is_indirect:1;
486 unsigned object_idx:20;
487 unsigned _footer:10; /* <-- Footer */
488 } any;
489 struct {
490 unsigned _header:22; /* <-- Header */
491 unsigned negate:1; /* <-- Footer */
492 unsigned abs:1;
493 unsigned swizzle_x:2;
494 unsigned swizzle_y:2;
495 unsigned swizzle_z:2;
496 unsigned swizzle_w:2;
497 } alu;
498 struct {
499 unsigned _header:22; /* <-- Header */
500 unsigned src_type:5; /* <-- Footer */
501 unsigned _pad:5;
502 } tex;
503 };
504
505 static void
506 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
507 {
508 /* Since sources are very frequent, we try to save some space when storing
509 * them. In particular, we store whether the source is a register and
510 * whether the register has an indirect index in the low two bits. We can
511 * assume that the high two bits of the index are zero, since otherwise our
512 * address space would've been exhausted allocating the remap table!
513 */
514 header.any.is_ssa = src->is_ssa;
515 if (src->is_ssa) {
516 header.any.object_idx = write_lookup_object(ctx, src->ssa);
517 blob_write_uint32(ctx->blob, header.u32);
518 } else {
519 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
520 header.any.is_indirect = !!src->reg.indirect;
521 blob_write_uint32(ctx->blob, header.u32);
522 blob_write_uint32(ctx->blob, src->reg.base_offset);
523 if (src->reg.indirect) {
524 union packed_src header = {0};
525 write_src_full(ctx, src->reg.indirect, header);
526 }
527 }
528 }
529
530 static void
531 write_src(write_ctx *ctx, const nir_src *src)
532 {
533 union packed_src header = {0};
534 write_src_full(ctx, src, header);
535 }
536
537 static union packed_src
538 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
539 {
540 STATIC_ASSERT(sizeof(union packed_src) == 4);
541 union packed_src header;
542 header.u32 = blob_read_uint32(ctx->blob);
543
544 src->is_ssa = header.any.is_ssa;
545 if (src->is_ssa) {
546 src->ssa = read_lookup_object(ctx, header.any.object_idx);
547 } else {
548 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
549 src->reg.base_offset = blob_read_uint32(ctx->blob);
550 if (header.any.is_indirect) {
551 src->reg.indirect = ralloc(mem_ctx, nir_src);
552 read_src(ctx, src->reg.indirect, mem_ctx);
553 } else {
554 src->reg.indirect = NULL;
555 }
556 }
557 return header;
558 }
559
560 union packed_dest {
561 uint8_t u8;
562 struct {
563 uint8_t is_ssa:1;
564 uint8_t has_name:1;
565 uint8_t num_components:3;
566 uint8_t bit_size:3;
567 } ssa;
568 struct {
569 uint8_t is_ssa:1;
570 uint8_t is_indirect:1;
571 uint8_t _pad:6;
572 } reg;
573 };
574
575 enum intrinsic_const_indices_encoding {
576 /* Use the 6 bits of packed_const_indices to store 1-6 indices.
577 * 1 6-bit index, or 2 3-bit indices, or 3 2-bit indices, or
578 * 4-6 1-bit indices.
579 *
580 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
581 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
582 */
583 const_indices_6bit_all_combined,
584
585 const_indices_8bit, /* 8 bits per element */
586 const_indices_16bit, /* 16 bits per element */
587 const_indices_32bit, /* 32 bits per element */
588 };
589
590 enum load_const_packing {
591 /* Constants are not packed and are stored in following dwords. */
592 load_const_full,
593
594 /* packed_value contains high 19 bits, low bits are 0,
595 * good for floating-point decimals
596 */
597 load_const_scalar_hi_19bits,
598
599 /* packed_value contains low 19 bits, high bits are sign-extended */
600 load_const_scalar_lo_19bits_sext,
601 };
602
603 union packed_instr {
604 uint32_t u32;
605 struct {
606 unsigned instr_type:4; /* always present */
607 unsigned _pad:20;
608 unsigned dest:8; /* always last */
609 } any;
610 struct {
611 unsigned instr_type:4;
612 unsigned exact:1;
613 unsigned no_signed_wrap:1;
614 unsigned no_unsigned_wrap:1;
615 unsigned saturate:1;
616 unsigned writemask:4;
617 unsigned op:9;
618 unsigned packed_src_ssa_16bit:1;
619 /* Scalarized ALUs always have the same header. */
620 unsigned num_followup_alu_sharing_header:2;
621 unsigned dest:8;
622 } alu;
623 struct {
624 unsigned instr_type:4;
625 unsigned deref_type:3;
626 unsigned cast_type_same_as_last:1;
627 unsigned mode:10; /* deref_var redefines this */
628 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
629 unsigned _pad:5; /* deref_var redefines this */
630 unsigned dest:8;
631 } deref;
632 struct {
633 unsigned instr_type:4;
634 unsigned deref_type:3;
635 unsigned _pad:1;
636 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
637 unsigned dest:8;
638 } deref_var;
639 struct {
640 unsigned instr_type:4;
641 unsigned intrinsic:9;
642 unsigned num_components:3;
643 unsigned const_indices_encoding:2;
644 unsigned packed_const_indices:6;
645 unsigned dest:8;
646 } intrinsic;
647 struct {
648 unsigned instr_type:4;
649 unsigned last_component:4;
650 unsigned bit_size:3;
651 unsigned packing:2; /* enum load_const_packing */
652 unsigned packed_value:19; /* meaning determined by packing */
653 } load_const;
654 struct {
655 unsigned instr_type:4;
656 unsigned last_component:4;
657 unsigned bit_size:3;
658 unsigned _pad:21;
659 } undef;
660 struct {
661 unsigned instr_type:4;
662 unsigned num_srcs:4;
663 unsigned op:4;
664 unsigned texture_array_size:12;
665 unsigned dest:8;
666 } tex;
667 struct {
668 unsigned instr_type:4;
669 unsigned num_srcs:20;
670 unsigned dest:8;
671 } phi;
672 struct {
673 unsigned instr_type:4;
674 unsigned type:2;
675 unsigned _pad:26;
676 } jump;
677 };
678
679 /* Write "lo24" as low 24 bits in the first uint32. */
680 static void
681 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
682 nir_instr_type instr_type)
683 {
684 STATIC_ASSERT(sizeof(union packed_dest) == 1);
685 union packed_dest dest;
686 dest.u8 = 0;
687
688 dest.ssa.is_ssa = dst->is_ssa;
689 if (dst->is_ssa) {
690 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
691 dest.ssa.num_components =
692 encode_num_components_in_3bits(dst->ssa.num_components);
693 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
694 } else {
695 dest.reg.is_indirect = !!(dst->reg.indirect);
696 }
697 header.any.dest = dest.u8;
698
699 /* Check if the current ALU instruction has the same header as the previous
700 * instruction that is also ALU. If it is, we don't have to write
701 * the current header. This is a typical occurence after scalarization.
702 */
703 if (instr_type == nir_instr_type_alu) {
704 bool equal_header = false;
705
706 if (ctx->last_instr_type == nir_instr_type_alu) {
707 assert(ctx->last_alu_header_offset);
708 union packed_instr *last_header =
709 (union packed_instr *)(ctx->blob->data +
710 ctx->last_alu_header_offset);
711
712 /* Clear the field that counts ALUs with equal headers. */
713 union packed_instr clean_header;
714 clean_header.u32 = last_header->u32;
715 clean_header.alu.num_followup_alu_sharing_header = 0;
716
717 /* There can be at most 4 consecutive ALU instructions
718 * sharing the same header.
719 */
720 if (last_header->alu.num_followup_alu_sharing_header < 3 &&
721 header.u32 == clean_header.u32) {
722 last_header->alu.num_followup_alu_sharing_header++;
723 equal_header = true;
724 }
725 }
726
727 if (!equal_header) {
728 ctx->last_alu_header_offset = ctx->blob->size;
729 blob_write_uint32(ctx->blob, header.u32);
730 }
731 } else {
732 blob_write_uint32(ctx->blob, header.u32);
733 }
734
735 if (dst->is_ssa) {
736 write_add_object(ctx, &dst->ssa);
737 if (dest.ssa.has_name)
738 blob_write_string(ctx->blob, dst->ssa.name);
739 } else {
740 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
741 blob_write_uint32(ctx->blob, dst->reg.base_offset);
742 if (dst->reg.indirect)
743 write_src(ctx, dst->reg.indirect);
744 }
745 }
746
747 static void
748 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
749 union packed_instr header)
750 {
751 union packed_dest dest;
752 dest.u8 = header.any.dest;
753
754 if (dest.ssa.is_ssa) {
755 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
756 unsigned num_components =
757 decode_num_components_in_3bits(dest.ssa.num_components);
758 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
759 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
760 read_add_object(ctx, &dst->ssa);
761 } else {
762 dst->reg.reg = read_object(ctx);
763 dst->reg.base_offset = blob_read_uint32(ctx->blob);
764 if (dest.reg.is_indirect) {
765 dst->reg.indirect = ralloc(instr, nir_src);
766 read_src(ctx, dst->reg.indirect, instr);
767 }
768 }
769 }
770
771 static bool
772 are_object_ids_16bit(write_ctx *ctx)
773 {
774 /* Check the highest object ID, because they are monotonic. */
775 return ctx->next_idx < (1 << 16);
776 }
777
778 static bool
779 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
780 {
781 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
782
783 for (unsigned i = 0; i < num_srcs; i++) {
784 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
785 return false;
786
787 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
788
789 for (unsigned chan = 0; chan < src_components; chan++) {
790 if (alu->src[i].swizzle[chan] != chan)
791 return false;
792 }
793 }
794
795 return are_object_ids_16bit(ctx);
796 }
797
798 static void
799 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
800 {
801 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
802 /* 9 bits for nir_op */
803 STATIC_ASSERT(nir_num_opcodes <= 512);
804 union packed_instr header;
805 header.u32 = 0;
806
807 header.alu.instr_type = alu->instr.type;
808 header.alu.exact = alu->exact;
809 header.alu.no_signed_wrap = alu->no_signed_wrap;
810 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
811 header.alu.saturate = alu->dest.saturate;
812 header.alu.writemask = alu->dest.write_mask;
813 header.alu.op = alu->op;
814 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
815
816 write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
817
818 if (header.alu.packed_src_ssa_16bit) {
819 for (unsigned i = 0; i < num_srcs; i++) {
820 assert(alu->src[i].src.is_ssa);
821 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
822 assert(idx < (1 << 16));
823 blob_write_uint16(ctx->blob, idx);
824 }
825 } else {
826 for (unsigned i = 0; i < num_srcs; i++) {
827 union packed_src src;
828 src.u32 = 0;
829
830 src.alu.negate = alu->src[i].negate;
831 src.alu.abs = alu->src[i].abs;
832 src.alu.swizzle_x = alu->src[i].swizzle[0];
833 src.alu.swizzle_y = alu->src[i].swizzle[1];
834 src.alu.swizzle_z = alu->src[i].swizzle[2];
835 src.alu.swizzle_w = alu->src[i].swizzle[3];
836
837 write_src_full(ctx, &alu->src[i].src, src);
838 }
839 }
840 }
841
842 static nir_alu_instr *
843 read_alu(read_ctx *ctx, union packed_instr header)
844 {
845 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
846 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
847
848 alu->exact = header.alu.exact;
849 alu->no_signed_wrap = header.alu.no_signed_wrap;
850 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
851 alu->dest.saturate = header.alu.saturate;
852 alu->dest.write_mask = header.alu.writemask;
853
854 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
855
856 if (header.alu.packed_src_ssa_16bit) {
857 for (unsigned i = 0; i < num_srcs; i++) {
858 nir_alu_src *src = &alu->src[i];
859 src->src.is_ssa = true;
860 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
861
862 memset(&src->swizzle, 0, sizeof(src->swizzle));
863
864 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
865
866 for (unsigned chan = 0; chan < src_components; chan++)
867 src->swizzle[chan] = chan;
868 }
869 } else {
870 for (unsigned i = 0; i < num_srcs; i++) {
871 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
872
873 alu->src[i].negate = src.alu.negate;
874 alu->src[i].abs = src.alu.abs;
875 alu->src[i].swizzle[0] = src.alu.swizzle_x;
876 alu->src[i].swizzle[1] = src.alu.swizzle_y;
877 alu->src[i].swizzle[2] = src.alu.swizzle_z;
878 alu->src[i].swizzle[3] = src.alu.swizzle_w;
879 }
880 }
881
882 return alu;
883 }
884
885 static void
886 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
887 {
888 assert(deref->deref_type < 8);
889 assert(deref->mode < (1 << 10));
890
891 union packed_instr header;
892 header.u32 = 0;
893
894 header.deref.instr_type = deref->instr.type;
895 header.deref.deref_type = deref->deref_type;
896
897 if (deref->deref_type == nir_deref_type_cast) {
898 header.deref.mode = deref->mode;
899 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
900 }
901
902 unsigned var_idx = 0;
903 if (deref->deref_type == nir_deref_type_var) {
904 var_idx = write_lookup_object(ctx, deref->var);
905 if (var_idx && var_idx < (1 << 16))
906 header.deref_var.object_idx = var_idx;
907 }
908
909 if (deref->deref_type == nir_deref_type_array ||
910 deref->deref_type == nir_deref_type_ptr_as_array) {
911 header.deref.packed_src_ssa_16bit =
912 deref->parent.is_ssa && deref->arr.index.is_ssa &&
913 are_object_ids_16bit(ctx);
914 }
915
916 write_dest(ctx, &deref->dest, header, deref->instr.type);
917
918 switch (deref->deref_type) {
919 case nir_deref_type_var:
920 if (!header.deref_var.object_idx)
921 blob_write_uint32(ctx->blob, var_idx);
922 break;
923
924 case nir_deref_type_struct:
925 write_src(ctx, &deref->parent);
926 blob_write_uint32(ctx->blob, deref->strct.index);
927 break;
928
929 case nir_deref_type_array:
930 case nir_deref_type_ptr_as_array:
931 if (header.deref.packed_src_ssa_16bit) {
932 blob_write_uint16(ctx->blob,
933 write_lookup_object(ctx, deref->parent.ssa));
934 blob_write_uint16(ctx->blob,
935 write_lookup_object(ctx, deref->arr.index.ssa));
936 } else {
937 write_src(ctx, &deref->parent);
938 write_src(ctx, &deref->arr.index);
939 }
940 break;
941
942 case nir_deref_type_cast:
943 write_src(ctx, &deref->parent);
944 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
945 if (!header.deref.cast_type_same_as_last) {
946 encode_type_to_blob(ctx->blob, deref->type);
947 ctx->last_type = deref->type;
948 }
949 break;
950
951 case nir_deref_type_array_wildcard:
952 write_src(ctx, &deref->parent);
953 break;
954
955 default:
956 unreachable("Invalid deref type");
957 }
958 }
959
960 static nir_deref_instr *
961 read_deref(read_ctx *ctx, union packed_instr header)
962 {
963 nir_deref_type deref_type = header.deref.deref_type;
964 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
965
966 read_dest(ctx, &deref->dest, &deref->instr, header);
967
968 nir_deref_instr *parent;
969
970 switch (deref->deref_type) {
971 case nir_deref_type_var:
972 if (header.deref_var.object_idx)
973 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
974 else
975 deref->var = read_object(ctx);
976
977 deref->type = deref->var->type;
978 break;
979
980 case nir_deref_type_struct:
981 read_src(ctx, &deref->parent, &deref->instr);
982 parent = nir_src_as_deref(deref->parent);
983 deref->strct.index = blob_read_uint32(ctx->blob);
984 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
985 break;
986
987 case nir_deref_type_array:
988 case nir_deref_type_ptr_as_array:
989 if (header.deref.packed_src_ssa_16bit) {
990 deref->parent.is_ssa = true;
991 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
992 deref->arr.index.is_ssa = true;
993 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
994 } else {
995 read_src(ctx, &deref->parent, &deref->instr);
996 read_src(ctx, &deref->arr.index, &deref->instr);
997 }
998
999 parent = nir_src_as_deref(deref->parent);
1000 if (deref->deref_type == nir_deref_type_array)
1001 deref->type = glsl_get_array_element(parent->type);
1002 else
1003 deref->type = parent->type;
1004 break;
1005
1006 case nir_deref_type_cast:
1007 read_src(ctx, &deref->parent, &deref->instr);
1008 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1009 if (header.deref.cast_type_same_as_last) {
1010 deref->type = ctx->last_type;
1011 } else {
1012 deref->type = decode_type_from_blob(ctx->blob);
1013 ctx->last_type = deref->type;
1014 }
1015 break;
1016
1017 case nir_deref_type_array_wildcard:
1018 read_src(ctx, &deref->parent, &deref->instr);
1019 parent = nir_src_as_deref(deref->parent);
1020 deref->type = glsl_get_array_element(parent->type);
1021 break;
1022
1023 default:
1024 unreachable("Invalid deref type");
1025 }
1026
1027 if (deref_type == nir_deref_type_var) {
1028 deref->mode = deref->var->data.mode;
1029 } else if (deref->deref_type == nir_deref_type_cast) {
1030 deref->mode = header.deref.mode;
1031 } else {
1032 assert(deref->parent.is_ssa);
1033 deref->mode = nir_instr_as_deref(deref->parent.ssa->parent_instr)->mode;
1034 }
1035
1036 return deref;
1037 }
1038
1039 static void
1040 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1041 {
1042 /* 9 bits for nir_intrinsic_op */
1043 STATIC_ASSERT(nir_num_intrinsics <= 512);
1044 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1045 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1046 assert(intrin->intrinsic < 512);
1047
1048 union packed_instr header;
1049 header.u32 = 0;
1050
1051 header.intrinsic.instr_type = intrin->instr.type;
1052 header.intrinsic.intrinsic = intrin->intrinsic;
1053 header.intrinsic.num_components =
1054 encode_num_components_in_3bits(intrin->num_components);
1055
1056 /* Analyze constant indices to decide how to encode them. */
1057 if (num_indices) {
1058 unsigned max_bits = 0;
1059 for (unsigned i = 0; i < num_indices; i++) {
1060 unsigned max = util_last_bit(intrin->const_index[i]);
1061 max_bits = MAX2(max_bits, max);
1062 }
1063
1064 if (max_bits * num_indices <= 6) {
1065 header.intrinsic.const_indices_encoding = const_indices_6bit_all_combined;
1066
1067 /* Pack all const indices into 6 bits. */
1068 unsigned bit_size = 6 / num_indices;
1069 for (unsigned i = 0; i < num_indices; i++) {
1070 header.intrinsic.packed_const_indices |=
1071 intrin->const_index[i] << (i * bit_size);
1072 }
1073 } else if (max_bits <= 8)
1074 header.intrinsic.const_indices_encoding = const_indices_8bit;
1075 else if (max_bits <= 16)
1076 header.intrinsic.const_indices_encoding = const_indices_16bit;
1077 else
1078 header.intrinsic.const_indices_encoding = const_indices_32bit;
1079 }
1080
1081 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1082 write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1083 else
1084 blob_write_uint32(ctx->blob, header.u32);
1085
1086 for (unsigned i = 0; i < num_srcs; i++)
1087 write_src(ctx, &intrin->src[i]);
1088
1089 if (num_indices) {
1090 switch (header.intrinsic.const_indices_encoding) {
1091 case const_indices_8bit:
1092 for (unsigned i = 0; i < num_indices; i++)
1093 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1094 break;
1095 case const_indices_16bit:
1096 for (unsigned i = 0; i < num_indices; i++)
1097 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1098 break;
1099 case const_indices_32bit:
1100 for (unsigned i = 0; i < num_indices; i++)
1101 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1102 break;
1103 }
1104 }
1105 }
1106
1107 static nir_intrinsic_instr *
1108 read_intrinsic(read_ctx *ctx, union packed_instr header)
1109 {
1110 nir_intrinsic_op op = header.intrinsic.intrinsic;
1111 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1112
1113 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1114 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1115
1116 intrin->num_components =
1117 decode_num_components_in_3bits(header.intrinsic.num_components);
1118
1119 if (nir_intrinsic_infos[op].has_dest)
1120 read_dest(ctx, &intrin->dest, &intrin->instr, header);
1121
1122 for (unsigned i = 0; i < num_srcs; i++)
1123 read_src(ctx, &intrin->src[i], &intrin->instr);
1124
1125 if (num_indices) {
1126 switch (header.intrinsic.const_indices_encoding) {
1127 case const_indices_6bit_all_combined: {
1128 unsigned bit_size = 6 / num_indices;
1129 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1130 for (unsigned i = 0; i < num_indices; i++) {
1131 intrin->const_index[i] =
1132 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1133 bit_mask;
1134 }
1135 break;
1136 }
1137 case const_indices_8bit:
1138 for (unsigned i = 0; i < num_indices; i++)
1139 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1140 break;
1141 case const_indices_16bit:
1142 for (unsigned i = 0; i < num_indices; i++)
1143 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1144 break;
1145 case const_indices_32bit:
1146 for (unsigned i = 0; i < num_indices; i++)
1147 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1148 break;
1149 }
1150 }
1151
1152 return intrin;
1153 }
1154
1155 static void
1156 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1157 {
1158 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1159 union packed_instr header;
1160 header.u32 = 0;
1161
1162 header.load_const.instr_type = lc->instr.type;
1163 header.load_const.last_component = lc->def.num_components - 1;
1164 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1165 header.load_const.packing = load_const_full;
1166
1167 /* Try to pack 1-component constants into the 19 free bits in the header. */
1168 if (lc->def.num_components == 1) {
1169 switch (lc->def.bit_size) {
1170 case 64:
1171 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1172 /* packed_value contains high 19 bits, low bits are 0 */
1173 header.load_const.packing = load_const_scalar_hi_19bits;
1174 header.load_const.packed_value = lc->value[0].u64 >> 45;
1175 } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1176 /* packed_value contains low 19 bits, high bits are sign-extended */
1177 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1178 header.load_const.packed_value = lc->value[0].u64;
1179 }
1180 break;
1181
1182 case 32:
1183 if ((lc->value[0].u32 & 0x1fff) == 0) {
1184 header.load_const.packing = load_const_scalar_hi_19bits;
1185 header.load_const.packed_value = lc->value[0].u32 >> 13;
1186 } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1187 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1188 header.load_const.packed_value = lc->value[0].u32;
1189 }
1190 break;
1191
1192 case 16:
1193 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1194 header.load_const.packed_value = lc->value[0].u16;
1195 break;
1196 case 8:
1197 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1198 header.load_const.packed_value = lc->value[0].u8;
1199 break;
1200 case 1:
1201 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1202 header.load_const.packed_value = lc->value[0].b;
1203 break;
1204 default:
1205 unreachable("invalid bit_size");
1206 }
1207 }
1208
1209 blob_write_uint32(ctx->blob, header.u32);
1210
1211 if (header.load_const.packing == load_const_full) {
1212 switch (lc->def.bit_size) {
1213 case 64:
1214 blob_write_bytes(ctx->blob, lc->value,
1215 sizeof(*lc->value) * lc->def.num_components);
1216 break;
1217
1218 case 32:
1219 for (unsigned i = 0; i < lc->def.num_components; i++)
1220 blob_write_uint32(ctx->blob, lc->value[i].u32);
1221 break;
1222
1223 case 16:
1224 for (unsigned i = 0; i < lc->def.num_components; i++)
1225 blob_write_uint16(ctx->blob, lc->value[i].u16);
1226 break;
1227
1228 default:
1229 assert(lc->def.bit_size <= 8);
1230 for (unsigned i = 0; i < lc->def.num_components; i++)
1231 blob_write_uint8(ctx->blob, lc->value[i].u8);
1232 break;
1233 }
1234 }
1235
1236 write_add_object(ctx, &lc->def);
1237 }
1238
1239 static nir_load_const_instr *
1240 read_load_const(read_ctx *ctx, union packed_instr header)
1241 {
1242 nir_load_const_instr *lc =
1243 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1244 decode_bit_size_3bits(header.load_const.bit_size));
1245
1246 switch (header.load_const.packing) {
1247 case load_const_scalar_hi_19bits:
1248 switch (lc->def.bit_size) {
1249 case 64:
1250 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1251 break;
1252 case 32:
1253 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1254 break;
1255 default:
1256 unreachable("invalid bit_size");
1257 }
1258 break;
1259
1260 case load_const_scalar_lo_19bits_sext:
1261 switch (lc->def.bit_size) {
1262 case 64:
1263 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1264 break;
1265 case 32:
1266 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1267 break;
1268 case 16:
1269 lc->value[0].u16 = header.load_const.packed_value;
1270 break;
1271 case 8:
1272 lc->value[0].u8 = header.load_const.packed_value;
1273 break;
1274 case 1:
1275 lc->value[0].b = header.load_const.packed_value;
1276 break;
1277 default:
1278 unreachable("invalid bit_size");
1279 }
1280 break;
1281
1282 case load_const_full:
1283 switch (lc->def.bit_size) {
1284 case 64:
1285 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1286 break;
1287
1288 case 32:
1289 for (unsigned i = 0; i < lc->def.num_components; i++)
1290 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1291 break;
1292
1293 case 16:
1294 for (unsigned i = 0; i < lc->def.num_components; i++)
1295 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1296 break;
1297
1298 default:
1299 assert(lc->def.bit_size <= 8);
1300 for (unsigned i = 0; i < lc->def.num_components; i++)
1301 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1302 break;
1303 }
1304 break;
1305 }
1306
1307 read_add_object(ctx, &lc->def);
1308 return lc;
1309 }
1310
1311 static void
1312 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1313 {
1314 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1315
1316 union packed_instr header;
1317 header.u32 = 0;
1318
1319 header.undef.instr_type = undef->instr.type;
1320 header.undef.last_component = undef->def.num_components - 1;
1321 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1322
1323 blob_write_uint32(ctx->blob, header.u32);
1324 write_add_object(ctx, &undef->def);
1325 }
1326
1327 static nir_ssa_undef_instr *
1328 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1329 {
1330 nir_ssa_undef_instr *undef =
1331 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1332 decode_bit_size_3bits(header.undef.bit_size));
1333
1334 read_add_object(ctx, &undef->def);
1335 return undef;
1336 }
1337
1338 union packed_tex_data {
1339 uint32_t u32;
1340 struct {
1341 enum glsl_sampler_dim sampler_dim:4;
1342 nir_alu_type dest_type:8;
1343 unsigned coord_components:3;
1344 unsigned is_array:1;
1345 unsigned is_shadow:1;
1346 unsigned is_new_style_shadow:1;
1347 unsigned component:2;
1348 unsigned unused:10; /* Mark unused for valgrind. */
1349 } u;
1350 };
1351
1352 static void
1353 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1354 {
1355 assert(tex->num_srcs < 16);
1356 assert(tex->op < 16);
1357 assert(tex->texture_array_size < 1024);
1358
1359 union packed_instr header;
1360 header.u32 = 0;
1361
1362 header.tex.instr_type = tex->instr.type;
1363 header.tex.num_srcs = tex->num_srcs;
1364 header.tex.op = tex->op;
1365 header.tex.texture_array_size = tex->texture_array_size;
1366
1367 write_dest(ctx, &tex->dest, header, tex->instr.type);
1368
1369 blob_write_uint32(ctx->blob, tex->texture_index);
1370 blob_write_uint32(ctx->blob, tex->sampler_index);
1371 if (tex->op == nir_texop_tg4)
1372 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1373
1374 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1375 union packed_tex_data packed = {
1376 .u.sampler_dim = tex->sampler_dim,
1377 .u.dest_type = tex->dest_type,
1378 .u.coord_components = tex->coord_components,
1379 .u.is_array = tex->is_array,
1380 .u.is_shadow = tex->is_shadow,
1381 .u.is_new_style_shadow = tex->is_new_style_shadow,
1382 .u.component = tex->component,
1383 };
1384 blob_write_uint32(ctx->blob, packed.u32);
1385
1386 for (unsigned i = 0; i < tex->num_srcs; i++) {
1387 union packed_src src;
1388 src.u32 = 0;
1389 src.tex.src_type = tex->src[i].src_type;
1390 write_src_full(ctx, &tex->src[i].src, src);
1391 }
1392 }
1393
1394 static nir_tex_instr *
1395 read_tex(read_ctx *ctx, union packed_instr header)
1396 {
1397 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1398
1399 read_dest(ctx, &tex->dest, &tex->instr, header);
1400
1401 tex->op = header.tex.op;
1402 tex->texture_index = blob_read_uint32(ctx->blob);
1403 tex->texture_array_size = header.tex.texture_array_size;
1404 tex->sampler_index = blob_read_uint32(ctx->blob);
1405 if (tex->op == nir_texop_tg4)
1406 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1407
1408 union packed_tex_data packed;
1409 packed.u32 = blob_read_uint32(ctx->blob);
1410 tex->sampler_dim = packed.u.sampler_dim;
1411 tex->dest_type = packed.u.dest_type;
1412 tex->coord_components = packed.u.coord_components;
1413 tex->is_array = packed.u.is_array;
1414 tex->is_shadow = packed.u.is_shadow;
1415 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1416 tex->component = packed.u.component;
1417
1418 for (unsigned i = 0; i < tex->num_srcs; i++) {
1419 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1420 tex->src[i].src_type = src.tex.src_type;
1421 }
1422
1423 return tex;
1424 }
1425
1426 static void
1427 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1428 {
1429 union packed_instr header;
1430 header.u32 = 0;
1431
1432 header.phi.instr_type = phi->instr.type;
1433 header.phi.num_srcs = exec_list_length(&phi->srcs);
1434
1435 /* Phi nodes are special, since they may reference SSA definitions and
1436 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1437 * and then store enough information so that a later fixup pass can fill
1438 * them in correctly.
1439 */
1440 write_dest(ctx, &phi->dest, header, phi->instr.type);
1441
1442 nir_foreach_phi_src(src, phi) {
1443 assert(src->src.is_ssa);
1444 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1445 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1446 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1447 write_phi_fixup fixup = {
1448 .blob_offset = blob_offset,
1449 .src = src->src.ssa,
1450 .block = src->pred,
1451 };
1452 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1453 }
1454 }
1455
1456 static void
1457 write_fixup_phis(write_ctx *ctx)
1458 {
1459 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1460 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1461 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1462 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1463 }
1464
1465 util_dynarray_clear(&ctx->phi_fixups);
1466 }
1467
1468 static nir_phi_instr *
1469 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1470 {
1471 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1472
1473 read_dest(ctx, &phi->dest, &phi->instr, header);
1474
1475 /* For similar reasons as before, we just store the index directly into the
1476 * pointer, and let a later pass resolve the phi sources.
1477 *
1478 * In order to ensure that the copied sources (which are just the indices
1479 * from the blob for now) don't get inserted into the old shader's use-def
1480 * lists, we have to add the phi instruction *before* we set up its
1481 * sources.
1482 */
1483 nir_instr_insert_after_block(blk, &phi->instr);
1484
1485 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1486 nir_phi_src *src = ralloc(phi, nir_phi_src);
1487
1488 src->src.is_ssa = true;
1489 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1490 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1491
1492 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1493 * we have to set the parent_instr manually. It doesn't really matter
1494 * when we do it, so we might as well do it here.
1495 */
1496 src->src.parent_instr = &phi->instr;
1497
1498 /* Stash it in the list of phi sources. We'll walk this list and fix up
1499 * sources at the very end of read_function_impl.
1500 */
1501 list_add(&src->src.use_link, &ctx->phi_srcs);
1502
1503 exec_list_push_tail(&phi->srcs, &src->node);
1504 }
1505
1506 return phi;
1507 }
1508
1509 static void
1510 read_fixup_phis(read_ctx *ctx)
1511 {
1512 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1513 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1514 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1515
1516 /* Remove from this list */
1517 list_del(&src->src.use_link);
1518
1519 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1520 }
1521 assert(list_is_empty(&ctx->phi_srcs));
1522 }
1523
1524 static void
1525 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1526 {
1527 assert(jmp->type < 4);
1528
1529 union packed_instr header;
1530 header.u32 = 0;
1531
1532 header.jump.instr_type = jmp->instr.type;
1533 header.jump.type = jmp->type;
1534
1535 blob_write_uint32(ctx->blob, header.u32);
1536 }
1537
1538 static nir_jump_instr *
1539 read_jump(read_ctx *ctx, union packed_instr header)
1540 {
1541 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1542 return jmp;
1543 }
1544
1545 static void
1546 write_call(write_ctx *ctx, const nir_call_instr *call)
1547 {
1548 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1549
1550 for (unsigned i = 0; i < call->num_params; i++)
1551 write_src(ctx, &call->params[i]);
1552 }
1553
1554 static nir_call_instr *
1555 read_call(read_ctx *ctx)
1556 {
1557 nir_function *callee = read_object(ctx);
1558 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1559
1560 for (unsigned i = 0; i < call->num_params; i++)
1561 read_src(ctx, &call->params[i], call);
1562
1563 return call;
1564 }
1565
1566 static void
1567 write_instr(write_ctx *ctx, const nir_instr *instr)
1568 {
1569 /* We have only 4 bits for the instruction type. */
1570 assert(instr->type < 16);
1571
1572 switch (instr->type) {
1573 case nir_instr_type_alu:
1574 write_alu(ctx, nir_instr_as_alu(instr));
1575 break;
1576 case nir_instr_type_deref:
1577 write_deref(ctx, nir_instr_as_deref(instr));
1578 break;
1579 case nir_instr_type_intrinsic:
1580 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1581 break;
1582 case nir_instr_type_load_const:
1583 write_load_const(ctx, nir_instr_as_load_const(instr));
1584 break;
1585 case nir_instr_type_ssa_undef:
1586 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1587 break;
1588 case nir_instr_type_tex:
1589 write_tex(ctx, nir_instr_as_tex(instr));
1590 break;
1591 case nir_instr_type_phi:
1592 write_phi(ctx, nir_instr_as_phi(instr));
1593 break;
1594 case nir_instr_type_jump:
1595 write_jump(ctx, nir_instr_as_jump(instr));
1596 break;
1597 case nir_instr_type_call:
1598 blob_write_uint32(ctx->blob, instr->type);
1599 write_call(ctx, nir_instr_as_call(instr));
1600 break;
1601 case nir_instr_type_parallel_copy:
1602 unreachable("Cannot write parallel copies");
1603 default:
1604 unreachable("bad instr type");
1605 }
1606 }
1607
1608 /* Return the number of instructions read. */
1609 static unsigned
1610 read_instr(read_ctx *ctx, nir_block *block)
1611 {
1612 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1613 union packed_instr header;
1614 header.u32 = blob_read_uint32(ctx->blob);
1615 nir_instr *instr;
1616
1617 switch (header.any.instr_type) {
1618 case nir_instr_type_alu:
1619 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1620 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1621 return header.alu.num_followup_alu_sharing_header + 1;
1622 case nir_instr_type_deref:
1623 instr = &read_deref(ctx, header)->instr;
1624 break;
1625 case nir_instr_type_intrinsic:
1626 instr = &read_intrinsic(ctx, header)->instr;
1627 break;
1628 case nir_instr_type_load_const:
1629 instr = &read_load_const(ctx, header)->instr;
1630 break;
1631 case nir_instr_type_ssa_undef:
1632 instr = &read_ssa_undef(ctx, header)->instr;
1633 break;
1634 case nir_instr_type_tex:
1635 instr = &read_tex(ctx, header)->instr;
1636 break;
1637 case nir_instr_type_phi:
1638 /* Phi instructions are a bit of a special case when reading because we
1639 * don't want inserting the instruction to automatically handle use/defs
1640 * for us. Instead, we need to wait until all the blocks/instructions
1641 * are read so that we can set their sources up.
1642 */
1643 read_phi(ctx, block, header);
1644 return 1;
1645 case nir_instr_type_jump:
1646 instr = &read_jump(ctx, header)->instr;
1647 break;
1648 case nir_instr_type_call:
1649 instr = &read_call(ctx)->instr;
1650 break;
1651 case nir_instr_type_parallel_copy:
1652 unreachable("Cannot read parallel copies");
1653 default:
1654 unreachable("bad instr type");
1655 }
1656
1657 nir_instr_insert_after_block(block, instr);
1658 return 1;
1659 }
1660
1661 static void
1662 write_block(write_ctx *ctx, const nir_block *block)
1663 {
1664 write_add_object(ctx, block);
1665 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1666
1667 ctx->last_instr_type = ~0;
1668 ctx->last_alu_header_offset = 0;
1669
1670 nir_foreach_instr(instr, block) {
1671 write_instr(ctx, instr);
1672 ctx->last_instr_type = instr->type;
1673 }
1674 }
1675
1676 static void
1677 read_block(read_ctx *ctx, struct exec_list *cf_list)
1678 {
1679 /* Don't actually create a new block. Just use the one from the tail of
1680 * the list. NIR guarantees that the tail of the list is a block and that
1681 * no two blocks are side-by-side in the IR; It should be empty.
1682 */
1683 nir_block *block =
1684 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1685
1686 read_add_object(ctx, block);
1687 unsigned num_instrs = blob_read_uint32(ctx->blob);
1688 for (unsigned i = 0; i < num_instrs;) {
1689 i += read_instr(ctx, block);
1690 }
1691 }
1692
1693 static void
1694 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1695
1696 static void
1697 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1698
1699 static void
1700 write_if(write_ctx *ctx, nir_if *nif)
1701 {
1702 write_src(ctx, &nif->condition);
1703
1704 write_cf_list(ctx, &nif->then_list);
1705 write_cf_list(ctx, &nif->else_list);
1706 }
1707
1708 static void
1709 read_if(read_ctx *ctx, struct exec_list *cf_list)
1710 {
1711 nir_if *nif = nir_if_create(ctx->nir);
1712
1713 read_src(ctx, &nif->condition, nif);
1714
1715 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1716
1717 read_cf_list(ctx, &nif->then_list);
1718 read_cf_list(ctx, &nif->else_list);
1719 }
1720
1721 static void
1722 write_loop(write_ctx *ctx, nir_loop *loop)
1723 {
1724 write_cf_list(ctx, &loop->body);
1725 }
1726
1727 static void
1728 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1729 {
1730 nir_loop *loop = nir_loop_create(ctx->nir);
1731
1732 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1733
1734 read_cf_list(ctx, &loop->body);
1735 }
1736
1737 static void
1738 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1739 {
1740 blob_write_uint32(ctx->blob, cf->type);
1741
1742 switch (cf->type) {
1743 case nir_cf_node_block:
1744 write_block(ctx, nir_cf_node_as_block(cf));
1745 break;
1746 case nir_cf_node_if:
1747 write_if(ctx, nir_cf_node_as_if(cf));
1748 break;
1749 case nir_cf_node_loop:
1750 write_loop(ctx, nir_cf_node_as_loop(cf));
1751 break;
1752 default:
1753 unreachable("bad cf type");
1754 }
1755 }
1756
1757 static void
1758 read_cf_node(read_ctx *ctx, struct exec_list *list)
1759 {
1760 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1761
1762 switch (type) {
1763 case nir_cf_node_block:
1764 read_block(ctx, list);
1765 break;
1766 case nir_cf_node_if:
1767 read_if(ctx, list);
1768 break;
1769 case nir_cf_node_loop:
1770 read_loop(ctx, list);
1771 break;
1772 default:
1773 unreachable("bad cf type");
1774 }
1775 }
1776
1777 static void
1778 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1779 {
1780 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1781 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1782 write_cf_node(ctx, cf);
1783 }
1784 }
1785
1786 static void
1787 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1788 {
1789 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1790 for (unsigned i = 0; i < num_cf_nodes; i++)
1791 read_cf_node(ctx, cf_list);
1792 }
1793
1794 static void
1795 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1796 {
1797 write_var_list(ctx, &fi->locals);
1798 write_reg_list(ctx, &fi->registers);
1799 blob_write_uint32(ctx->blob, fi->reg_alloc);
1800
1801 write_cf_list(ctx, &fi->body);
1802 write_fixup_phis(ctx);
1803 }
1804
1805 static nir_function_impl *
1806 read_function_impl(read_ctx *ctx, nir_function *fxn)
1807 {
1808 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1809 fi->function = fxn;
1810
1811 read_var_list(ctx, &fi->locals);
1812 read_reg_list(ctx, &fi->registers);
1813 fi->reg_alloc = blob_read_uint32(ctx->blob);
1814
1815 read_cf_list(ctx, &fi->body);
1816 read_fixup_phis(ctx);
1817
1818 fi->valid_metadata = 0;
1819
1820 return fi;
1821 }
1822
1823 static void
1824 write_function(write_ctx *ctx, const nir_function *fxn)
1825 {
1826 uint32_t flags = fxn->is_entrypoint;
1827 if (fxn->name)
1828 flags |= 0x2;
1829 if (fxn->impl)
1830 flags |= 0x4;
1831 blob_write_uint32(ctx->blob, flags);
1832 if (fxn->name)
1833 blob_write_string(ctx->blob, fxn->name);
1834
1835 write_add_object(ctx, fxn);
1836
1837 blob_write_uint32(ctx->blob, fxn->num_params);
1838 for (unsigned i = 0; i < fxn->num_params; i++) {
1839 uint32_t val =
1840 ((uint32_t)fxn->params[i].num_components) |
1841 ((uint32_t)fxn->params[i].bit_size) << 8;
1842 blob_write_uint32(ctx->blob, val);
1843 }
1844
1845 /* At first glance, it looks like we should write the function_impl here.
1846 * However, call instructions need to be able to reference at least the
1847 * function and those will get processed as we write the function_impls.
1848 * We stop here and write function_impls as a second pass.
1849 */
1850 }
1851
1852 static void
1853 read_function(read_ctx *ctx)
1854 {
1855 uint32_t flags = blob_read_uint32(ctx->blob);
1856 bool has_name = flags & 0x2;
1857 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1858
1859 nir_function *fxn = nir_function_create(ctx->nir, name);
1860
1861 read_add_object(ctx, fxn);
1862
1863 fxn->num_params = blob_read_uint32(ctx->blob);
1864 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1865 for (unsigned i = 0; i < fxn->num_params; i++) {
1866 uint32_t val = blob_read_uint32(ctx->blob);
1867 fxn->params[i].num_components = val & 0xff;
1868 fxn->params[i].bit_size = (val >> 8) & 0xff;
1869 }
1870
1871 fxn->is_entrypoint = flags & 0x1;
1872 if (flags & 0x4)
1873 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1874 }
1875
1876 /**
1877 * Serialize NIR into a binary blob.
1878 *
1879 * \param strip Don't serialize information only useful for debugging,
1880 * such as variable names, making cache hits from similar
1881 * shaders more likely.
1882 */
1883 void
1884 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1885 {
1886 write_ctx ctx = {0};
1887 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1888 ctx.blob = blob;
1889 ctx.nir = nir;
1890 ctx.strip = strip;
1891 util_dynarray_init(&ctx.phi_fixups, NULL);
1892
1893 size_t idx_size_offset = blob_reserve_uint32(blob);
1894
1895 struct shader_info info = nir->info;
1896 uint32_t strings = 0;
1897 if (!strip && info.name)
1898 strings |= 0x1;
1899 if (!strip && info.label)
1900 strings |= 0x2;
1901 blob_write_uint32(blob, strings);
1902 if (!strip && info.name)
1903 blob_write_string(blob, info.name);
1904 if (!strip && info.label)
1905 blob_write_string(blob, info.label);
1906 info.name = info.label = NULL;
1907 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1908
1909 write_var_list(&ctx, &nir->uniforms);
1910 write_var_list(&ctx, &nir->inputs);
1911 write_var_list(&ctx, &nir->outputs);
1912 write_var_list(&ctx, &nir->shared);
1913 write_var_list(&ctx, &nir->globals);
1914 write_var_list(&ctx, &nir->system_values);
1915
1916 blob_write_uint32(blob, nir->num_inputs);
1917 blob_write_uint32(blob, nir->num_uniforms);
1918 blob_write_uint32(blob, nir->num_outputs);
1919 blob_write_uint32(blob, nir->num_shared);
1920 blob_write_uint32(blob, nir->scratch_size);
1921
1922 blob_write_uint32(blob, exec_list_length(&nir->functions));
1923 nir_foreach_function(fxn, nir) {
1924 write_function(&ctx, fxn);
1925 }
1926
1927 nir_foreach_function(fxn, nir) {
1928 if (fxn->impl)
1929 write_function_impl(&ctx, fxn->impl);
1930 }
1931
1932 blob_write_uint32(blob, nir->constant_data_size);
1933 if (nir->constant_data_size > 0)
1934 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
1935
1936 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1937
1938 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1939 util_dynarray_fini(&ctx.phi_fixups);
1940 }
1941
1942 nir_shader *
1943 nir_deserialize(void *mem_ctx,
1944 const struct nir_shader_compiler_options *options,
1945 struct blob_reader *blob)
1946 {
1947 read_ctx ctx = {0};
1948 ctx.blob = blob;
1949 list_inithead(&ctx.phi_srcs);
1950 ctx.idx_table_len = blob_read_uint32(blob);
1951 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1952
1953 uint32_t strings = blob_read_uint32(blob);
1954 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1955 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1956
1957 struct shader_info info;
1958 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1959
1960 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1961
1962 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1963 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1964
1965 ctx.nir->info = info;
1966
1967 read_var_list(&ctx, &ctx.nir->uniforms);
1968 read_var_list(&ctx, &ctx.nir->inputs);
1969 read_var_list(&ctx, &ctx.nir->outputs);
1970 read_var_list(&ctx, &ctx.nir->shared);
1971 read_var_list(&ctx, &ctx.nir->globals);
1972 read_var_list(&ctx, &ctx.nir->system_values);
1973
1974 ctx.nir->num_inputs = blob_read_uint32(blob);
1975 ctx.nir->num_uniforms = blob_read_uint32(blob);
1976 ctx.nir->num_outputs = blob_read_uint32(blob);
1977 ctx.nir->num_shared = blob_read_uint32(blob);
1978 ctx.nir->scratch_size = blob_read_uint32(blob);
1979
1980 unsigned num_functions = blob_read_uint32(blob);
1981 for (unsigned i = 0; i < num_functions; i++)
1982 read_function(&ctx);
1983
1984 nir_foreach_function(fxn, ctx.nir) {
1985 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
1986 fxn->impl = read_function_impl(&ctx, fxn);
1987 }
1988
1989 ctx.nir->constant_data_size = blob_read_uint32(blob);
1990 if (ctx.nir->constant_data_size > 0) {
1991 ctx.nir->constant_data =
1992 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
1993 blob_copy_bytes(blob, ctx.nir->constant_data,
1994 ctx.nir->constant_data_size);
1995 }
1996
1997 free(ctx.idx_table);
1998
1999 return ctx.nir;
2000 }
2001
2002 void
2003 nir_shader_serialize_deserialize(nir_shader *shader)
2004 {
2005 const struct nir_shader_compiler_options *options = shader->options;
2006
2007 struct blob writer;
2008 blob_init(&writer);
2009 nir_serialize(&writer, shader, false);
2010
2011 /* Delete all of dest's ralloc children but leave dest alone */
2012 void *dead_ctx = ralloc_context(NULL);
2013 ralloc_adopt(dead_ctx, shader);
2014 ralloc_free(dead_ctx);
2015
2016 dead_ctx = ralloc_context(NULL);
2017
2018 struct blob_reader reader;
2019 blob_reader_init(&reader, writer.data, writer.size);
2020 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2021
2022 blob_finish(&writer);
2023
2024 nir_shader_replace(shader, copy);
2025 ralloc_free(dead_ctx);
2026 }