nir/serialize: don't serialize mode for deref non-cast instructions
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* The last serialized type. */
55 const struct glsl_type *last_type;
56 const struct glsl_type *last_interface_type;
57 struct nir_variable_data last_var_data;
58
59 /* Don't write optional data such as variable names. */
60 bool strip;
61 } write_ctx;
62
63 typedef struct {
64 nir_shader *nir;
65
66 struct blob_reader *blob;
67
68 /* the next index to assign to a NIR in-memory object */
69 uint32_t next_idx;
70
71 /* The length of the index -> object table */
72 uint32_t idx_table_len;
73
74 /* map from index to deserialized pointer */
75 void **idx_table;
76
77 /* List of phi sources. */
78 struct list_head phi_srcs;
79
80 /* The last deserialized type. */
81 const struct glsl_type *last_type;
82 const struct glsl_type *last_interface_type;
83 struct nir_variable_data last_var_data;
84 } read_ctx;
85
86 static void
87 write_add_object(write_ctx *ctx, const void *obj)
88 {
89 uint32_t index = ctx->next_idx++;
90 assert(index != MAX_OBJECT_IDS);
91 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
92 }
93
94 static uint32_t
95 write_lookup_object(write_ctx *ctx, const void *obj)
96 {
97 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
98 assert(entry);
99 return (uint32_t)(uintptr_t) entry->data;
100 }
101
102 static void
103 write_object(write_ctx *ctx, const void *obj)
104 {
105 blob_write_uint32(ctx->blob, write_lookup_object(ctx, obj));
106 }
107
108 static void
109 read_add_object(read_ctx *ctx, void *obj)
110 {
111 assert(ctx->next_idx < ctx->idx_table_len);
112 ctx->idx_table[ctx->next_idx++] = obj;
113 }
114
115 static void *
116 read_lookup_object(read_ctx *ctx, uint32_t idx)
117 {
118 assert(idx < ctx->idx_table_len);
119 return ctx->idx_table[idx];
120 }
121
122 static void *
123 read_object(read_ctx *ctx)
124 {
125 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
126 }
127
128 static uint32_t
129 encode_bit_size_3bits(uint8_t bit_size)
130 {
131 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
133 if (bit_size)
134 return util_logbase2(bit_size) + 1;
135 return 0;
136 }
137
138 static uint8_t
139 decode_bit_size_3bits(uint8_t bit_size)
140 {
141 if (bit_size)
142 return 1 << (bit_size - 1);
143 return 0;
144 }
145
146 static uint8_t
147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149 if (num_components <= 4)
150 return num_components;
151 if (num_components == 8)
152 return 5;
153 if (num_components == 16)
154 return 6;
155
156 unreachable("invalid number in num_components");
157 return 0;
158 }
159
160 static uint8_t
161 decode_num_components_in_3bits(uint8_t value)
162 {
163 if (value <= 4)
164 return value;
165 if (value == 5)
166 return 8;
167 if (value == 6)
168 return 16;
169
170 unreachable("invalid num_components encoding");
171 return 0;
172 }
173
174 static void
175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178 blob_write_uint32(ctx->blob, c->num_elements);
179 for (unsigned i = 0; i < c->num_elements; i++)
180 write_constant(ctx, c->elements[i]);
181 }
182
183 static nir_constant *
184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186 nir_constant *c = ralloc(nvar, nir_constant);
187
188 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189 c->num_elements = blob_read_uint32(ctx->blob);
190 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191 for (unsigned i = 0; i < c->num_elements; i++)
192 c->elements[i] = read_constant(ctx, nvar);
193
194 return c;
195 }
196
197 enum var_data_encoding {
198 var_encode_full,
199 var_encode_shader_temp,
200 var_encode_function_temp,
201 var_encode_location_diff,
202 };
203
204 union packed_var {
205 uint32_t u32;
206 struct {
207 unsigned has_name:1;
208 unsigned has_constant_initializer:1;
209 unsigned has_interface_type:1;
210 unsigned num_state_slots:7;
211 unsigned data_encoding:2;
212 unsigned type_same_as_last:1;
213 unsigned interface_type_same_as_last:1;
214 unsigned _pad:2;
215 unsigned num_members:16;
216 } u;
217 };
218
219 union packed_var_data_diff {
220 uint32_t u32;
221 struct {
222 int location:13;
223 int location_frac:3;
224 int driver_location:16;
225 } u;
226 };
227
228 static void
229 write_variable(write_ctx *ctx, const nir_variable *var)
230 {
231 write_add_object(ctx, var);
232
233 assert(var->num_state_slots < (1 << 7));
234 assert(var->num_members < (1 << 16));
235
236 STATIC_ASSERT(sizeof(union packed_var) == 4);
237 union packed_var flags;
238 flags.u32 = 0;
239
240 flags.u.has_name = !ctx->strip && var->name;
241 flags.u.has_constant_initializer = !!(var->constant_initializer);
242 flags.u.has_interface_type = !!(var->interface_type);
243 flags.u.type_same_as_last = var->type == ctx->last_type;
244 flags.u.interface_type_same_as_last =
245 var->interface_type && var->interface_type == ctx->last_interface_type;
246 flags.u.num_state_slots = var->num_state_slots;
247 flags.u.num_members = var->num_members;
248
249 struct nir_variable_data data = var->data;
250
251 /* When stripping, we expect that the location is no longer needed,
252 * which is typically after shaders are linked.
253 */
254 if (ctx->strip &&
255 data.mode != nir_var_shader_in &&
256 data.mode != nir_var_shader_out)
257 data.location = 0;
258
259 /* Temporary variables don't serialize var->data. */
260 if (data.mode == nir_var_shader_temp)
261 flags.u.data_encoding = var_encode_shader_temp;
262 else if (data.mode == nir_var_function_temp)
263 flags.u.data_encoding = var_encode_function_temp;
264 else {
265 struct nir_variable_data tmp = data;
266
267 tmp.location = ctx->last_var_data.location;
268 tmp.location_frac = ctx->last_var_data.location_frac;
269 tmp.driver_location = ctx->last_var_data.driver_location;
270
271 /* See if we can encode only the difference in locations from the last
272 * variable.
273 */
274 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
275 abs((int)data.location -
276 (int)ctx->last_var_data.location) < (1 << 12) &&
277 abs((int)data.driver_location -
278 (int)ctx->last_var_data.driver_location) < (1 << 15))
279 flags.u.data_encoding = var_encode_location_diff;
280 else
281 flags.u.data_encoding = var_encode_full;
282 }
283
284 blob_write_uint32(ctx->blob, flags.u32);
285
286 if (!flags.u.type_same_as_last) {
287 encode_type_to_blob(ctx->blob, var->type);
288 ctx->last_type = var->type;
289 }
290
291 if (var->interface_type && !flags.u.interface_type_same_as_last) {
292 encode_type_to_blob(ctx->blob, var->interface_type);
293 ctx->last_interface_type = var->interface_type;
294 }
295
296 if (flags.u.has_name)
297 blob_write_string(ctx->blob, var->name);
298
299 if (flags.u.data_encoding == var_encode_full ||
300 flags.u.data_encoding == var_encode_location_diff) {
301 if (flags.u.data_encoding == var_encode_full) {
302 blob_write_bytes(ctx->blob, &data, sizeof(data));
303 } else {
304 /* Serialize only the difference in locations from the last variable.
305 */
306 union packed_var_data_diff diff;
307
308 diff.u.location = data.location - ctx->last_var_data.location;
309 diff.u.location_frac = data.location_frac -
310 ctx->last_var_data.location_frac;
311 diff.u.driver_location = data.driver_location -
312 ctx->last_var_data.driver_location;
313
314 blob_write_uint32(ctx->blob, diff.u32);
315 }
316
317 ctx->last_var_data = data;
318 }
319
320 for (unsigned i = 0; i < var->num_state_slots; i++) {
321 blob_write_bytes(ctx->blob, &var->state_slots[i],
322 sizeof(var->state_slots[i]));
323 }
324 if (var->constant_initializer)
325 write_constant(ctx, var->constant_initializer);
326 if (var->num_members > 0) {
327 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
328 var->num_members * sizeof(*var->members));
329 }
330 }
331
332 static nir_variable *
333 read_variable(read_ctx *ctx)
334 {
335 nir_variable *var = rzalloc(ctx->nir, nir_variable);
336 read_add_object(ctx, var);
337
338 union packed_var flags;
339 flags.u32 = blob_read_uint32(ctx->blob);
340
341 if (flags.u.type_same_as_last) {
342 var->type = ctx->last_type;
343 } else {
344 var->type = decode_type_from_blob(ctx->blob);
345 ctx->last_type = var->type;
346 }
347
348 if (flags.u.has_interface_type) {
349 if (flags.u.interface_type_same_as_last) {
350 var->interface_type = ctx->last_interface_type;
351 } else {
352 var->interface_type = decode_type_from_blob(ctx->blob);
353 ctx->last_interface_type = var->interface_type;
354 }
355 }
356
357 if (flags.u.has_name) {
358 const char *name = blob_read_string(ctx->blob);
359 var->name = ralloc_strdup(var, name);
360 } else {
361 var->name = NULL;
362 }
363
364 if (flags.u.data_encoding == var_encode_shader_temp)
365 var->data.mode = nir_var_shader_temp;
366 else if (flags.u.data_encoding == var_encode_function_temp)
367 var->data.mode = nir_var_function_temp;
368 else if (flags.u.data_encoding == var_encode_full) {
369 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
370 ctx->last_var_data = var->data;
371 } else { /* var_encode_location_diff */
372 union packed_var_data_diff diff;
373 diff.u32 = blob_read_uint32(ctx->blob);
374
375 var->data = ctx->last_var_data;
376 var->data.location += diff.u.location;
377 var->data.location_frac += diff.u.location_frac;
378 var->data.driver_location += diff.u.driver_location;
379
380 ctx->last_var_data = var->data;
381 }
382
383 var->num_state_slots = flags.u.num_state_slots;
384 if (var->num_state_slots != 0) {
385 var->state_slots = ralloc_array(var, nir_state_slot,
386 var->num_state_slots);
387 for (unsigned i = 0; i < var->num_state_slots; i++) {
388 blob_copy_bytes(ctx->blob, &var->state_slots[i],
389 sizeof(var->state_slots[i]));
390 }
391 }
392 if (flags.u.has_constant_initializer)
393 var->constant_initializer = read_constant(ctx, var);
394 else
395 var->constant_initializer = NULL;
396 var->num_members = flags.u.num_members;
397 if (var->num_members > 0) {
398 var->members = ralloc_array(var, struct nir_variable_data,
399 var->num_members);
400 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
401 var->num_members * sizeof(*var->members));
402 }
403
404 return var;
405 }
406
407 static void
408 write_var_list(write_ctx *ctx, const struct exec_list *src)
409 {
410 blob_write_uint32(ctx->blob, exec_list_length(src));
411 foreach_list_typed(nir_variable, var, node, src) {
412 write_variable(ctx, var);
413 }
414 }
415
416 static void
417 read_var_list(read_ctx *ctx, struct exec_list *dst)
418 {
419 exec_list_make_empty(dst);
420 unsigned num_vars = blob_read_uint32(ctx->blob);
421 for (unsigned i = 0; i < num_vars; i++) {
422 nir_variable *var = read_variable(ctx);
423 exec_list_push_tail(dst, &var->node);
424 }
425 }
426
427 static void
428 write_register(write_ctx *ctx, const nir_register *reg)
429 {
430 write_add_object(ctx, reg);
431 blob_write_uint32(ctx->blob, reg->num_components);
432 blob_write_uint32(ctx->blob, reg->bit_size);
433 blob_write_uint32(ctx->blob, reg->num_array_elems);
434 blob_write_uint32(ctx->blob, reg->index);
435 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
436 if (!ctx->strip && reg->name)
437 blob_write_string(ctx->blob, reg->name);
438 }
439
440 static nir_register *
441 read_register(read_ctx *ctx)
442 {
443 nir_register *reg = ralloc(ctx->nir, nir_register);
444 read_add_object(ctx, reg);
445 reg->num_components = blob_read_uint32(ctx->blob);
446 reg->bit_size = blob_read_uint32(ctx->blob);
447 reg->num_array_elems = blob_read_uint32(ctx->blob);
448 reg->index = blob_read_uint32(ctx->blob);
449 bool has_name = blob_read_uint32(ctx->blob);
450 if (has_name) {
451 const char *name = blob_read_string(ctx->blob);
452 reg->name = ralloc_strdup(reg, name);
453 } else {
454 reg->name = NULL;
455 }
456
457 list_inithead(&reg->uses);
458 list_inithead(&reg->defs);
459 list_inithead(&reg->if_uses);
460
461 return reg;
462 }
463
464 static void
465 write_reg_list(write_ctx *ctx, const struct exec_list *src)
466 {
467 blob_write_uint32(ctx->blob, exec_list_length(src));
468 foreach_list_typed(nir_register, reg, node, src)
469 write_register(ctx, reg);
470 }
471
472 static void
473 read_reg_list(read_ctx *ctx, struct exec_list *dst)
474 {
475 exec_list_make_empty(dst);
476 unsigned num_regs = blob_read_uint32(ctx->blob);
477 for (unsigned i = 0; i < num_regs; i++) {
478 nir_register *reg = read_register(ctx);
479 exec_list_push_tail(dst, &reg->node);
480 }
481 }
482
483 union packed_src {
484 uint32_t u32;
485 struct {
486 unsigned is_ssa:1; /* <-- Header */
487 unsigned is_indirect:1;
488 unsigned object_idx:20;
489 unsigned _footer:10; /* <-- Footer */
490 } any;
491 struct {
492 unsigned _header:22; /* <-- Header */
493 unsigned negate:1; /* <-- Footer */
494 unsigned abs:1;
495 unsigned swizzle_x:2;
496 unsigned swizzle_y:2;
497 unsigned swizzle_z:2;
498 unsigned swizzle_w:2;
499 } alu;
500 struct {
501 unsigned _header:22; /* <-- Header */
502 unsigned src_type:5; /* <-- Footer */
503 unsigned _pad:5;
504 } tex;
505 };
506
507 static void
508 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
509 {
510 /* Since sources are very frequent, we try to save some space when storing
511 * them. In particular, we store whether the source is a register and
512 * whether the register has an indirect index in the low two bits. We can
513 * assume that the high two bits of the index are zero, since otherwise our
514 * address space would've been exhausted allocating the remap table!
515 */
516 header.any.is_ssa = src->is_ssa;
517 if (src->is_ssa) {
518 header.any.object_idx = write_lookup_object(ctx, src->ssa);
519 blob_write_uint32(ctx->blob, header.u32);
520 } else {
521 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
522 header.any.is_indirect = !!src->reg.indirect;
523 blob_write_uint32(ctx->blob, header.u32);
524 blob_write_uint32(ctx->blob, src->reg.base_offset);
525 if (src->reg.indirect) {
526 union packed_src header = {0};
527 write_src_full(ctx, src->reg.indirect, header);
528 }
529 }
530 }
531
532 static void
533 write_src(write_ctx *ctx, const nir_src *src)
534 {
535 union packed_src header = {0};
536 write_src_full(ctx, src, header);
537 }
538
539 static union packed_src
540 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
541 {
542 STATIC_ASSERT(sizeof(union packed_src) == 4);
543 union packed_src header;
544 header.u32 = blob_read_uint32(ctx->blob);
545
546 src->is_ssa = header.any.is_ssa;
547 if (src->is_ssa) {
548 src->ssa = read_lookup_object(ctx, header.any.object_idx);
549 } else {
550 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
551 src->reg.base_offset = blob_read_uint32(ctx->blob);
552 if (header.any.is_indirect) {
553 src->reg.indirect = ralloc(mem_ctx, nir_src);
554 read_src(ctx, src->reg.indirect, mem_ctx);
555 } else {
556 src->reg.indirect = NULL;
557 }
558 }
559 return header;
560 }
561
562 union packed_dest {
563 uint8_t u8;
564 struct {
565 uint8_t is_ssa:1;
566 uint8_t has_name:1;
567 uint8_t num_components:3;
568 uint8_t bit_size:3;
569 } ssa;
570 struct {
571 uint8_t is_ssa:1;
572 uint8_t is_indirect:1;
573 uint8_t _pad:6;
574 } reg;
575 };
576
577 enum intrinsic_const_indices_encoding {
578 /* Use the 6 bits of packed_const_indices to store 1-6 indices.
579 * 1 6-bit index, or 2 3-bit indices, or 3 2-bit indices, or
580 * 4-6 1-bit indices.
581 *
582 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
583 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
584 */
585 const_indices_6bit_all_combined,
586
587 const_indices_8bit, /* 8 bits per element */
588 const_indices_16bit, /* 16 bits per element */
589 const_indices_32bit, /* 32 bits per element */
590 };
591
592 enum load_const_packing {
593 /* Constants are not packed and are stored in following dwords. */
594 load_const_full,
595
596 /* packed_value contains high 19 bits, low bits are 0,
597 * good for floating-point decimals
598 */
599 load_const_scalar_hi_19bits,
600
601 /* packed_value contains low 19 bits, high bits are sign-extended */
602 load_const_scalar_lo_19bits_sext,
603 };
604
605 union packed_instr {
606 uint32_t u32;
607 struct {
608 unsigned instr_type:4; /* always present */
609 unsigned _pad:20;
610 unsigned dest:8; /* always last */
611 } any;
612 struct {
613 unsigned instr_type:4;
614 unsigned exact:1;
615 unsigned no_signed_wrap:1;
616 unsigned no_unsigned_wrap:1;
617 unsigned saturate:1;
618 unsigned writemask:4;
619 unsigned op:9;
620 unsigned packed_src_ssa_16bit:1;
621 unsigned _pad:2;
622 unsigned dest:8;
623 } alu;
624 struct {
625 unsigned instr_type:4;
626 unsigned deref_type:3;
627 unsigned cast_type_same_as_last:1;
628 unsigned mode:10;
629 unsigned _pad:6;
630 unsigned dest:8;
631 } deref;
632 struct {
633 unsigned instr_type:4;
634 unsigned intrinsic:9;
635 unsigned num_components:3;
636 unsigned const_indices_encoding:2;
637 unsigned packed_const_indices:6;
638 unsigned dest:8;
639 } intrinsic;
640 struct {
641 unsigned instr_type:4;
642 unsigned last_component:4;
643 unsigned bit_size:3;
644 unsigned packing:2; /* enum load_const_packing */
645 unsigned packed_value:19; /* meaning determined by packing */
646 } load_const;
647 struct {
648 unsigned instr_type:4;
649 unsigned last_component:4;
650 unsigned bit_size:3;
651 unsigned _pad:21;
652 } undef;
653 struct {
654 unsigned instr_type:4;
655 unsigned num_srcs:4;
656 unsigned op:4;
657 unsigned texture_array_size:12;
658 unsigned dest:8;
659 } tex;
660 struct {
661 unsigned instr_type:4;
662 unsigned num_srcs:20;
663 unsigned dest:8;
664 } phi;
665 struct {
666 unsigned instr_type:4;
667 unsigned type:2;
668 unsigned _pad:26;
669 } jump;
670 };
671
672 /* Write "lo24" as low 24 bits in the first uint32. */
673 static void
674 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header)
675 {
676 STATIC_ASSERT(sizeof(union packed_dest) == 1);
677 union packed_dest dest;
678 dest.u8 = 0;
679
680 dest.ssa.is_ssa = dst->is_ssa;
681 if (dst->is_ssa) {
682 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
683 dest.ssa.num_components =
684 encode_num_components_in_3bits(dst->ssa.num_components);
685 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
686 } else {
687 dest.reg.is_indirect = !!(dst->reg.indirect);
688 }
689
690 header.any.dest = dest.u8;
691 blob_write_uint32(ctx->blob, header.u32);
692
693 if (dst->is_ssa) {
694 write_add_object(ctx, &dst->ssa);
695 if (dest.ssa.has_name)
696 blob_write_string(ctx->blob, dst->ssa.name);
697 } else {
698 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
699 blob_write_uint32(ctx->blob, dst->reg.base_offset);
700 if (dst->reg.indirect)
701 write_src(ctx, dst->reg.indirect);
702 }
703 }
704
705 static void
706 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
707 union packed_instr header)
708 {
709 union packed_dest dest;
710 dest.u8 = header.any.dest;
711
712 if (dest.ssa.is_ssa) {
713 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
714 unsigned num_components =
715 decode_num_components_in_3bits(dest.ssa.num_components);
716 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
717 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
718 read_add_object(ctx, &dst->ssa);
719 } else {
720 dst->reg.reg = read_object(ctx);
721 dst->reg.base_offset = blob_read_uint32(ctx->blob);
722 if (dest.reg.is_indirect) {
723 dst->reg.indirect = ralloc(instr, nir_src);
724 read_src(ctx, dst->reg.indirect, instr);
725 }
726 }
727 }
728
729 static bool
730 are_object_ids_16bit(write_ctx *ctx)
731 {
732 /* Check the highest object ID, because they are monotonic. */
733 return ctx->next_idx < (1 << 16);
734 }
735
736 static bool
737 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
738 {
739 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
740
741 for (unsigned i = 0; i < num_srcs; i++) {
742 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
743 return false;
744
745 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
746
747 for (unsigned chan = 0; chan < src_components; chan++) {
748 if (alu->src[i].swizzle[chan] != chan)
749 return false;
750 }
751 }
752
753 return are_object_ids_16bit(ctx);
754 }
755
756 static void
757 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
758 {
759 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
760 /* 9 bits for nir_op */
761 STATIC_ASSERT(nir_num_opcodes <= 512);
762 union packed_instr header;
763 header.u32 = 0;
764
765 header.alu.instr_type = alu->instr.type;
766 header.alu.exact = alu->exact;
767 header.alu.no_signed_wrap = alu->no_signed_wrap;
768 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
769 header.alu.saturate = alu->dest.saturate;
770 header.alu.writemask = alu->dest.write_mask;
771 header.alu.op = alu->op;
772 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
773
774 write_dest(ctx, &alu->dest.dest, header);
775
776 if (header.alu.packed_src_ssa_16bit) {
777 for (unsigned i = 0; i < num_srcs; i++) {
778 assert(alu->src[i].src.is_ssa);
779 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
780 assert(idx < (1 << 16));
781 blob_write_uint16(ctx->blob, idx);
782 }
783 } else {
784 for (unsigned i = 0; i < num_srcs; i++) {
785 union packed_src src;
786 src.u32 = 0;
787
788 src.alu.negate = alu->src[i].negate;
789 src.alu.abs = alu->src[i].abs;
790 src.alu.swizzle_x = alu->src[i].swizzle[0];
791 src.alu.swizzle_y = alu->src[i].swizzle[1];
792 src.alu.swizzle_z = alu->src[i].swizzle[2];
793 src.alu.swizzle_w = alu->src[i].swizzle[3];
794
795 write_src_full(ctx, &alu->src[i].src, src);
796 }
797 }
798 }
799
800 static nir_alu_instr *
801 read_alu(read_ctx *ctx, union packed_instr header)
802 {
803 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
804 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
805
806 alu->exact = header.alu.exact;
807 alu->no_signed_wrap = header.alu.no_signed_wrap;
808 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
809 alu->dest.saturate = header.alu.saturate;
810 alu->dest.write_mask = header.alu.writemask;
811
812 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
813
814 if (header.alu.packed_src_ssa_16bit) {
815 for (unsigned i = 0; i < num_srcs; i++) {
816 nir_alu_src *src = &alu->src[i];
817 src->src.is_ssa = true;
818 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
819
820 memset(&src->swizzle, 0, sizeof(src->swizzle));
821
822 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
823
824 for (unsigned chan = 0; chan < src_components; chan++)
825 src->swizzle[chan] = chan;
826 }
827 } else {
828 for (unsigned i = 0; i < num_srcs; i++) {
829 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
830
831 alu->src[i].negate = src.alu.negate;
832 alu->src[i].abs = src.alu.abs;
833 alu->src[i].swizzle[0] = src.alu.swizzle_x;
834 alu->src[i].swizzle[1] = src.alu.swizzle_y;
835 alu->src[i].swizzle[2] = src.alu.swizzle_z;
836 alu->src[i].swizzle[3] = src.alu.swizzle_w;
837 }
838 }
839
840 return alu;
841 }
842
843 static void
844 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
845 {
846 assert(deref->deref_type < 8);
847 assert(deref->mode < (1 << 10));
848
849 union packed_instr header;
850 header.u32 = 0;
851
852 header.deref.instr_type = deref->instr.type;
853 header.deref.deref_type = deref->deref_type;
854
855 if (deref->deref_type == nir_deref_type_cast) {
856 header.deref.mode = deref->mode;
857 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
858 }
859
860 write_dest(ctx, &deref->dest, header);
861
862 if (deref->deref_type == nir_deref_type_var) {
863 write_object(ctx, deref->var);
864 return;
865 }
866
867 write_src(ctx, &deref->parent);
868
869 switch (deref->deref_type) {
870 case nir_deref_type_struct:
871 blob_write_uint32(ctx->blob, deref->strct.index);
872 break;
873
874 case nir_deref_type_array:
875 case nir_deref_type_ptr_as_array:
876 write_src(ctx, &deref->arr.index);
877 break;
878
879 case nir_deref_type_cast:
880 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
881 if (!header.deref.cast_type_same_as_last) {
882 encode_type_to_blob(ctx->blob, deref->type);
883 ctx->last_type = deref->type;
884 }
885 break;
886
887 case nir_deref_type_array_wildcard:
888 /* Nothing to do */
889 break;
890
891 default:
892 unreachable("Invalid deref type");
893 }
894 }
895
896 static nir_deref_instr *
897 read_deref(read_ctx *ctx, union packed_instr header)
898 {
899 nir_deref_type deref_type = header.deref.deref_type;
900 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
901
902 read_dest(ctx, &deref->dest, &deref->instr, header);
903
904 if (deref_type == nir_deref_type_var) {
905 deref->var = read_object(ctx);
906 deref->type = deref->var->type;
907 deref->mode = deref->var->data.mode;
908 return deref;
909 }
910
911 read_src(ctx, &deref->parent, &deref->instr);
912 nir_deref_instr *parent;
913
914 switch (deref->deref_type) {
915 case nir_deref_type_struct:
916 parent = nir_src_as_deref(deref->parent);
917 deref->strct.index = blob_read_uint32(ctx->blob);
918 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
919 break;
920
921 case nir_deref_type_array:
922 case nir_deref_type_ptr_as_array:
923 parent = nir_src_as_deref(deref->parent);
924 if (deref->deref_type == nir_deref_type_array)
925 deref->type = glsl_get_array_element(parent->type);
926 else
927 deref->type = parent->type;
928 read_src(ctx, &deref->arr.index, &deref->instr);
929 break;
930
931 case nir_deref_type_cast:
932 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
933 if (header.deref.cast_type_same_as_last) {
934 deref->type = ctx->last_type;
935 } else {
936 deref->type = decode_type_from_blob(ctx->blob);
937 ctx->last_type = deref->type;
938 }
939 break;
940
941 case nir_deref_type_array_wildcard:
942 parent = nir_src_as_deref(deref->parent);
943 deref->type = glsl_get_array_element(parent->type);
944 break;
945
946 default:
947 unreachable("Invalid deref type");
948 }
949
950 if (deref->deref_type == nir_deref_type_cast) {
951 deref->mode = header.deref.mode;
952 } else {
953 assert(deref->parent.is_ssa);
954 deref->mode = nir_instr_as_deref(deref->parent.ssa->parent_instr)->mode;
955 }
956
957 return deref;
958 }
959
960 static void
961 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
962 {
963 /* 9 bits for nir_intrinsic_op */
964 STATIC_ASSERT(nir_num_intrinsics <= 512);
965 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
966 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
967 assert(intrin->intrinsic < 512);
968
969 union packed_instr header;
970 header.u32 = 0;
971
972 header.intrinsic.instr_type = intrin->instr.type;
973 header.intrinsic.intrinsic = intrin->intrinsic;
974 header.intrinsic.num_components =
975 encode_num_components_in_3bits(intrin->num_components);
976
977 /* Analyze constant indices to decide how to encode them. */
978 if (num_indices) {
979 unsigned max_bits = 0;
980 for (unsigned i = 0; i < num_indices; i++) {
981 unsigned max = util_last_bit(intrin->const_index[i]);
982 max_bits = MAX2(max_bits, max);
983 }
984
985 if (max_bits * num_indices <= 6) {
986 header.intrinsic.const_indices_encoding = const_indices_6bit_all_combined;
987
988 /* Pack all const indices into 6 bits. */
989 unsigned bit_size = 6 / num_indices;
990 for (unsigned i = 0; i < num_indices; i++) {
991 header.intrinsic.packed_const_indices |=
992 intrin->const_index[i] << (i * bit_size);
993 }
994 } else if (max_bits <= 8)
995 header.intrinsic.const_indices_encoding = const_indices_8bit;
996 else if (max_bits <= 16)
997 header.intrinsic.const_indices_encoding = const_indices_16bit;
998 else
999 header.intrinsic.const_indices_encoding = const_indices_32bit;
1000 }
1001
1002 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1003 write_dest(ctx, &intrin->dest, header);
1004 else
1005 blob_write_uint32(ctx->blob, header.u32);
1006
1007 for (unsigned i = 0; i < num_srcs; i++)
1008 write_src(ctx, &intrin->src[i]);
1009
1010 if (num_indices) {
1011 switch (header.intrinsic.const_indices_encoding) {
1012 case const_indices_8bit:
1013 for (unsigned i = 0; i < num_indices; i++)
1014 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1015 break;
1016 case const_indices_16bit:
1017 for (unsigned i = 0; i < num_indices; i++)
1018 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1019 break;
1020 case const_indices_32bit:
1021 for (unsigned i = 0; i < num_indices; i++)
1022 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1023 break;
1024 }
1025 }
1026 }
1027
1028 static nir_intrinsic_instr *
1029 read_intrinsic(read_ctx *ctx, union packed_instr header)
1030 {
1031 nir_intrinsic_op op = header.intrinsic.intrinsic;
1032 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1033
1034 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1035 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1036
1037 intrin->num_components =
1038 decode_num_components_in_3bits(header.intrinsic.num_components);
1039
1040 if (nir_intrinsic_infos[op].has_dest)
1041 read_dest(ctx, &intrin->dest, &intrin->instr, header);
1042
1043 for (unsigned i = 0; i < num_srcs; i++)
1044 read_src(ctx, &intrin->src[i], &intrin->instr);
1045
1046 if (num_indices) {
1047 switch (header.intrinsic.const_indices_encoding) {
1048 case const_indices_6bit_all_combined: {
1049 unsigned bit_size = 6 / num_indices;
1050 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1051 for (unsigned i = 0; i < num_indices; i++) {
1052 intrin->const_index[i] =
1053 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1054 bit_mask;
1055 }
1056 break;
1057 }
1058 case const_indices_8bit:
1059 for (unsigned i = 0; i < num_indices; i++)
1060 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1061 break;
1062 case const_indices_16bit:
1063 for (unsigned i = 0; i < num_indices; i++)
1064 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1065 break;
1066 case const_indices_32bit:
1067 for (unsigned i = 0; i < num_indices; i++)
1068 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1069 break;
1070 }
1071 }
1072
1073 return intrin;
1074 }
1075
1076 static void
1077 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1078 {
1079 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1080 union packed_instr header;
1081 header.u32 = 0;
1082
1083 header.load_const.instr_type = lc->instr.type;
1084 header.load_const.last_component = lc->def.num_components - 1;
1085 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1086 header.load_const.packing = load_const_full;
1087
1088 /* Try to pack 1-component constants into the 19 free bits in the header. */
1089 if (lc->def.num_components == 1) {
1090 switch (lc->def.bit_size) {
1091 case 64:
1092 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1093 /* packed_value contains high 19 bits, low bits are 0 */
1094 header.load_const.packing = load_const_scalar_hi_19bits;
1095 header.load_const.packed_value = lc->value[0].u64 >> 45;
1096 } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1097 /* packed_value contains low 19 bits, high bits are sign-extended */
1098 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1099 header.load_const.packed_value = lc->value[0].u64;
1100 }
1101 break;
1102
1103 case 32:
1104 if ((lc->value[0].u32 & 0x1fff) == 0) {
1105 header.load_const.packing = load_const_scalar_hi_19bits;
1106 header.load_const.packed_value = lc->value[0].u32 >> 13;
1107 } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1108 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1109 header.load_const.packed_value = lc->value[0].u32;
1110 }
1111 break;
1112
1113 case 16:
1114 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1115 header.load_const.packed_value = lc->value[0].u16;
1116 break;
1117 case 8:
1118 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1119 header.load_const.packed_value = lc->value[0].u8;
1120 break;
1121 case 1:
1122 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1123 header.load_const.packed_value = lc->value[0].b;
1124 break;
1125 default:
1126 unreachable("invalid bit_size");
1127 }
1128 }
1129
1130 blob_write_uint32(ctx->blob, header.u32);
1131
1132 if (header.load_const.packing == load_const_full) {
1133 switch (lc->def.bit_size) {
1134 case 64:
1135 blob_write_bytes(ctx->blob, lc->value,
1136 sizeof(*lc->value) * lc->def.num_components);
1137 break;
1138
1139 case 32:
1140 for (unsigned i = 0; i < lc->def.num_components; i++)
1141 blob_write_uint32(ctx->blob, lc->value[i].u32);
1142 break;
1143
1144 case 16:
1145 for (unsigned i = 0; i < lc->def.num_components; i++)
1146 blob_write_uint16(ctx->blob, lc->value[i].u16);
1147 break;
1148
1149 default:
1150 assert(lc->def.bit_size <= 8);
1151 for (unsigned i = 0; i < lc->def.num_components; i++)
1152 blob_write_uint8(ctx->blob, lc->value[i].u8);
1153 break;
1154 }
1155 }
1156
1157 write_add_object(ctx, &lc->def);
1158 }
1159
1160 static nir_load_const_instr *
1161 read_load_const(read_ctx *ctx, union packed_instr header)
1162 {
1163 nir_load_const_instr *lc =
1164 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1165 decode_bit_size_3bits(header.load_const.bit_size));
1166
1167 switch (header.load_const.packing) {
1168 case load_const_scalar_hi_19bits:
1169 switch (lc->def.bit_size) {
1170 case 64:
1171 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1172 break;
1173 case 32:
1174 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1175 break;
1176 default:
1177 unreachable("invalid bit_size");
1178 }
1179 break;
1180
1181 case load_const_scalar_lo_19bits_sext:
1182 switch (lc->def.bit_size) {
1183 case 64:
1184 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1185 break;
1186 case 32:
1187 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1188 break;
1189 case 16:
1190 lc->value[0].u16 = header.load_const.packed_value;
1191 break;
1192 case 8:
1193 lc->value[0].u8 = header.load_const.packed_value;
1194 break;
1195 case 1:
1196 lc->value[0].b = header.load_const.packed_value;
1197 break;
1198 default:
1199 unreachable("invalid bit_size");
1200 }
1201 break;
1202
1203 case load_const_full:
1204 switch (lc->def.bit_size) {
1205 case 64:
1206 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1207 break;
1208
1209 case 32:
1210 for (unsigned i = 0; i < lc->def.num_components; i++)
1211 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1212 break;
1213
1214 case 16:
1215 for (unsigned i = 0; i < lc->def.num_components; i++)
1216 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1217 break;
1218
1219 default:
1220 assert(lc->def.bit_size <= 8);
1221 for (unsigned i = 0; i < lc->def.num_components; i++)
1222 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1223 break;
1224 }
1225 break;
1226 }
1227
1228 read_add_object(ctx, &lc->def);
1229 return lc;
1230 }
1231
1232 static void
1233 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1234 {
1235 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1236
1237 union packed_instr header;
1238 header.u32 = 0;
1239
1240 header.undef.instr_type = undef->instr.type;
1241 header.undef.last_component = undef->def.num_components - 1;
1242 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1243
1244 blob_write_uint32(ctx->blob, header.u32);
1245 write_add_object(ctx, &undef->def);
1246 }
1247
1248 static nir_ssa_undef_instr *
1249 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1250 {
1251 nir_ssa_undef_instr *undef =
1252 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1253 decode_bit_size_3bits(header.undef.bit_size));
1254
1255 read_add_object(ctx, &undef->def);
1256 return undef;
1257 }
1258
1259 union packed_tex_data {
1260 uint32_t u32;
1261 struct {
1262 enum glsl_sampler_dim sampler_dim:4;
1263 nir_alu_type dest_type:8;
1264 unsigned coord_components:3;
1265 unsigned is_array:1;
1266 unsigned is_shadow:1;
1267 unsigned is_new_style_shadow:1;
1268 unsigned component:2;
1269 unsigned unused:10; /* Mark unused for valgrind. */
1270 } u;
1271 };
1272
1273 static void
1274 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1275 {
1276 assert(tex->num_srcs < 16);
1277 assert(tex->op < 16);
1278 assert(tex->texture_array_size < 1024);
1279
1280 union packed_instr header;
1281 header.u32 = 0;
1282
1283 header.tex.instr_type = tex->instr.type;
1284 header.tex.num_srcs = tex->num_srcs;
1285 header.tex.op = tex->op;
1286 header.tex.texture_array_size = tex->texture_array_size;
1287
1288 write_dest(ctx, &tex->dest, header);
1289
1290 blob_write_uint32(ctx->blob, tex->texture_index);
1291 blob_write_uint32(ctx->blob, tex->sampler_index);
1292 if (tex->op == nir_texop_tg4)
1293 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1294
1295 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1296 union packed_tex_data packed = {
1297 .u.sampler_dim = tex->sampler_dim,
1298 .u.dest_type = tex->dest_type,
1299 .u.coord_components = tex->coord_components,
1300 .u.is_array = tex->is_array,
1301 .u.is_shadow = tex->is_shadow,
1302 .u.is_new_style_shadow = tex->is_new_style_shadow,
1303 .u.component = tex->component,
1304 };
1305 blob_write_uint32(ctx->blob, packed.u32);
1306
1307 for (unsigned i = 0; i < tex->num_srcs; i++) {
1308 union packed_src src;
1309 src.u32 = 0;
1310 src.tex.src_type = tex->src[i].src_type;
1311 write_src_full(ctx, &tex->src[i].src, src);
1312 }
1313 }
1314
1315 static nir_tex_instr *
1316 read_tex(read_ctx *ctx, union packed_instr header)
1317 {
1318 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1319
1320 read_dest(ctx, &tex->dest, &tex->instr, header);
1321
1322 tex->op = header.tex.op;
1323 tex->texture_index = blob_read_uint32(ctx->blob);
1324 tex->texture_array_size = header.tex.texture_array_size;
1325 tex->sampler_index = blob_read_uint32(ctx->blob);
1326 if (tex->op == nir_texop_tg4)
1327 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1328
1329 union packed_tex_data packed;
1330 packed.u32 = blob_read_uint32(ctx->blob);
1331 tex->sampler_dim = packed.u.sampler_dim;
1332 tex->dest_type = packed.u.dest_type;
1333 tex->coord_components = packed.u.coord_components;
1334 tex->is_array = packed.u.is_array;
1335 tex->is_shadow = packed.u.is_shadow;
1336 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1337 tex->component = packed.u.component;
1338
1339 for (unsigned i = 0; i < tex->num_srcs; i++) {
1340 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1341 tex->src[i].src_type = src.tex.src_type;
1342 }
1343
1344 return tex;
1345 }
1346
1347 static void
1348 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1349 {
1350 union packed_instr header;
1351 header.u32 = 0;
1352
1353 header.phi.instr_type = phi->instr.type;
1354 header.phi.num_srcs = exec_list_length(&phi->srcs);
1355
1356 /* Phi nodes are special, since they may reference SSA definitions and
1357 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1358 * and then store enough information so that a later fixup pass can fill
1359 * them in correctly.
1360 */
1361 write_dest(ctx, &phi->dest, header);
1362
1363 nir_foreach_phi_src(src, phi) {
1364 assert(src->src.is_ssa);
1365 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1366 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1367 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1368 write_phi_fixup fixup = {
1369 .blob_offset = blob_offset,
1370 .src = src->src.ssa,
1371 .block = src->pred,
1372 };
1373 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1374 }
1375 }
1376
1377 static void
1378 write_fixup_phis(write_ctx *ctx)
1379 {
1380 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1381 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1382 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1383 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1384 }
1385
1386 util_dynarray_clear(&ctx->phi_fixups);
1387 }
1388
1389 static nir_phi_instr *
1390 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1391 {
1392 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1393
1394 read_dest(ctx, &phi->dest, &phi->instr, header);
1395
1396 /* For similar reasons as before, we just store the index directly into the
1397 * pointer, and let a later pass resolve the phi sources.
1398 *
1399 * In order to ensure that the copied sources (which are just the indices
1400 * from the blob for now) don't get inserted into the old shader's use-def
1401 * lists, we have to add the phi instruction *before* we set up its
1402 * sources.
1403 */
1404 nir_instr_insert_after_block(blk, &phi->instr);
1405
1406 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1407 nir_phi_src *src = ralloc(phi, nir_phi_src);
1408
1409 src->src.is_ssa = true;
1410 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1411 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1412
1413 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1414 * we have to set the parent_instr manually. It doesn't really matter
1415 * when we do it, so we might as well do it here.
1416 */
1417 src->src.parent_instr = &phi->instr;
1418
1419 /* Stash it in the list of phi sources. We'll walk this list and fix up
1420 * sources at the very end of read_function_impl.
1421 */
1422 list_add(&src->src.use_link, &ctx->phi_srcs);
1423
1424 exec_list_push_tail(&phi->srcs, &src->node);
1425 }
1426
1427 return phi;
1428 }
1429
1430 static void
1431 read_fixup_phis(read_ctx *ctx)
1432 {
1433 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1434 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1435 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1436
1437 /* Remove from this list */
1438 list_del(&src->src.use_link);
1439
1440 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1441 }
1442 assert(list_is_empty(&ctx->phi_srcs));
1443 }
1444
1445 static void
1446 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1447 {
1448 assert(jmp->type < 4);
1449
1450 union packed_instr header;
1451 header.u32 = 0;
1452
1453 header.jump.instr_type = jmp->instr.type;
1454 header.jump.type = jmp->type;
1455
1456 blob_write_uint32(ctx->blob, header.u32);
1457 }
1458
1459 static nir_jump_instr *
1460 read_jump(read_ctx *ctx, union packed_instr header)
1461 {
1462 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1463 return jmp;
1464 }
1465
1466 static void
1467 write_call(write_ctx *ctx, const nir_call_instr *call)
1468 {
1469 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1470
1471 for (unsigned i = 0; i < call->num_params; i++)
1472 write_src(ctx, &call->params[i]);
1473 }
1474
1475 static nir_call_instr *
1476 read_call(read_ctx *ctx)
1477 {
1478 nir_function *callee = read_object(ctx);
1479 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1480
1481 for (unsigned i = 0; i < call->num_params; i++)
1482 read_src(ctx, &call->params[i], call);
1483
1484 return call;
1485 }
1486
1487 static void
1488 write_instr(write_ctx *ctx, const nir_instr *instr)
1489 {
1490 /* We have only 4 bits for the instruction type. */
1491 assert(instr->type < 16);
1492
1493 switch (instr->type) {
1494 case nir_instr_type_alu:
1495 write_alu(ctx, nir_instr_as_alu(instr));
1496 break;
1497 case nir_instr_type_deref:
1498 write_deref(ctx, nir_instr_as_deref(instr));
1499 break;
1500 case nir_instr_type_intrinsic:
1501 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1502 break;
1503 case nir_instr_type_load_const:
1504 write_load_const(ctx, nir_instr_as_load_const(instr));
1505 break;
1506 case nir_instr_type_ssa_undef:
1507 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1508 break;
1509 case nir_instr_type_tex:
1510 write_tex(ctx, nir_instr_as_tex(instr));
1511 break;
1512 case nir_instr_type_phi:
1513 write_phi(ctx, nir_instr_as_phi(instr));
1514 break;
1515 case nir_instr_type_jump:
1516 write_jump(ctx, nir_instr_as_jump(instr));
1517 break;
1518 case nir_instr_type_call:
1519 blob_write_uint32(ctx->blob, instr->type);
1520 write_call(ctx, nir_instr_as_call(instr));
1521 break;
1522 case nir_instr_type_parallel_copy:
1523 unreachable("Cannot write parallel copies");
1524 default:
1525 unreachable("bad instr type");
1526 }
1527 }
1528
1529 static void
1530 read_instr(read_ctx *ctx, nir_block *block)
1531 {
1532 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1533 union packed_instr header;
1534 header.u32 = blob_read_uint32(ctx->blob);
1535 nir_instr *instr;
1536
1537 switch (header.any.instr_type) {
1538 case nir_instr_type_alu:
1539 instr = &read_alu(ctx, header)->instr;
1540 break;
1541 case nir_instr_type_deref:
1542 instr = &read_deref(ctx, header)->instr;
1543 break;
1544 case nir_instr_type_intrinsic:
1545 instr = &read_intrinsic(ctx, header)->instr;
1546 break;
1547 case nir_instr_type_load_const:
1548 instr = &read_load_const(ctx, header)->instr;
1549 break;
1550 case nir_instr_type_ssa_undef:
1551 instr = &read_ssa_undef(ctx, header)->instr;
1552 break;
1553 case nir_instr_type_tex:
1554 instr = &read_tex(ctx, header)->instr;
1555 break;
1556 case nir_instr_type_phi:
1557 /* Phi instructions are a bit of a special case when reading because we
1558 * don't want inserting the instruction to automatically handle use/defs
1559 * for us. Instead, we need to wait until all the blocks/instructions
1560 * are read so that we can set their sources up.
1561 */
1562 read_phi(ctx, block, header);
1563 return;
1564 case nir_instr_type_jump:
1565 instr = &read_jump(ctx, header)->instr;
1566 break;
1567 case nir_instr_type_call:
1568 instr = &read_call(ctx)->instr;
1569 break;
1570 case nir_instr_type_parallel_copy:
1571 unreachable("Cannot read parallel copies");
1572 default:
1573 unreachable("bad instr type");
1574 }
1575
1576 nir_instr_insert_after_block(block, instr);
1577 }
1578
1579 static void
1580 write_block(write_ctx *ctx, const nir_block *block)
1581 {
1582 write_add_object(ctx, block);
1583 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1584 nir_foreach_instr(instr, block)
1585 write_instr(ctx, instr);
1586 }
1587
1588 static void
1589 read_block(read_ctx *ctx, struct exec_list *cf_list)
1590 {
1591 /* Don't actually create a new block. Just use the one from the tail of
1592 * the list. NIR guarantees that the tail of the list is a block and that
1593 * no two blocks are side-by-side in the IR; It should be empty.
1594 */
1595 nir_block *block =
1596 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1597
1598 read_add_object(ctx, block);
1599 unsigned num_instrs = blob_read_uint32(ctx->blob);
1600 for (unsigned i = 0; i < num_instrs; i++) {
1601 read_instr(ctx, block);
1602 }
1603 }
1604
1605 static void
1606 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1607
1608 static void
1609 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1610
1611 static void
1612 write_if(write_ctx *ctx, nir_if *nif)
1613 {
1614 write_src(ctx, &nif->condition);
1615
1616 write_cf_list(ctx, &nif->then_list);
1617 write_cf_list(ctx, &nif->else_list);
1618 }
1619
1620 static void
1621 read_if(read_ctx *ctx, struct exec_list *cf_list)
1622 {
1623 nir_if *nif = nir_if_create(ctx->nir);
1624
1625 read_src(ctx, &nif->condition, nif);
1626
1627 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1628
1629 read_cf_list(ctx, &nif->then_list);
1630 read_cf_list(ctx, &nif->else_list);
1631 }
1632
1633 static void
1634 write_loop(write_ctx *ctx, nir_loop *loop)
1635 {
1636 write_cf_list(ctx, &loop->body);
1637 }
1638
1639 static void
1640 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1641 {
1642 nir_loop *loop = nir_loop_create(ctx->nir);
1643
1644 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1645
1646 read_cf_list(ctx, &loop->body);
1647 }
1648
1649 static void
1650 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1651 {
1652 blob_write_uint32(ctx->blob, cf->type);
1653
1654 switch (cf->type) {
1655 case nir_cf_node_block:
1656 write_block(ctx, nir_cf_node_as_block(cf));
1657 break;
1658 case nir_cf_node_if:
1659 write_if(ctx, nir_cf_node_as_if(cf));
1660 break;
1661 case nir_cf_node_loop:
1662 write_loop(ctx, nir_cf_node_as_loop(cf));
1663 break;
1664 default:
1665 unreachable("bad cf type");
1666 }
1667 }
1668
1669 static void
1670 read_cf_node(read_ctx *ctx, struct exec_list *list)
1671 {
1672 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1673
1674 switch (type) {
1675 case nir_cf_node_block:
1676 read_block(ctx, list);
1677 break;
1678 case nir_cf_node_if:
1679 read_if(ctx, list);
1680 break;
1681 case nir_cf_node_loop:
1682 read_loop(ctx, list);
1683 break;
1684 default:
1685 unreachable("bad cf type");
1686 }
1687 }
1688
1689 static void
1690 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1691 {
1692 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1693 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1694 write_cf_node(ctx, cf);
1695 }
1696 }
1697
1698 static void
1699 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1700 {
1701 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1702 for (unsigned i = 0; i < num_cf_nodes; i++)
1703 read_cf_node(ctx, cf_list);
1704 }
1705
1706 static void
1707 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1708 {
1709 write_var_list(ctx, &fi->locals);
1710 write_reg_list(ctx, &fi->registers);
1711 blob_write_uint32(ctx->blob, fi->reg_alloc);
1712
1713 write_cf_list(ctx, &fi->body);
1714 write_fixup_phis(ctx);
1715 }
1716
1717 static nir_function_impl *
1718 read_function_impl(read_ctx *ctx, nir_function *fxn)
1719 {
1720 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1721 fi->function = fxn;
1722
1723 read_var_list(ctx, &fi->locals);
1724 read_reg_list(ctx, &fi->registers);
1725 fi->reg_alloc = blob_read_uint32(ctx->blob);
1726
1727 read_cf_list(ctx, &fi->body);
1728 read_fixup_phis(ctx);
1729
1730 fi->valid_metadata = 0;
1731
1732 return fi;
1733 }
1734
1735 static void
1736 write_function(write_ctx *ctx, const nir_function *fxn)
1737 {
1738 uint32_t flags = fxn->is_entrypoint;
1739 if (fxn->name)
1740 flags |= 0x2;
1741 if (fxn->impl)
1742 flags |= 0x4;
1743 blob_write_uint32(ctx->blob, flags);
1744 if (fxn->name)
1745 blob_write_string(ctx->blob, fxn->name);
1746
1747 write_add_object(ctx, fxn);
1748
1749 blob_write_uint32(ctx->blob, fxn->num_params);
1750 for (unsigned i = 0; i < fxn->num_params; i++) {
1751 uint32_t val =
1752 ((uint32_t)fxn->params[i].num_components) |
1753 ((uint32_t)fxn->params[i].bit_size) << 8;
1754 blob_write_uint32(ctx->blob, val);
1755 }
1756
1757 /* At first glance, it looks like we should write the function_impl here.
1758 * However, call instructions need to be able to reference at least the
1759 * function and those will get processed as we write the function_impls.
1760 * We stop here and write function_impls as a second pass.
1761 */
1762 }
1763
1764 static void
1765 read_function(read_ctx *ctx)
1766 {
1767 uint32_t flags = blob_read_uint32(ctx->blob);
1768 bool has_name = flags & 0x2;
1769 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1770
1771 nir_function *fxn = nir_function_create(ctx->nir, name);
1772
1773 read_add_object(ctx, fxn);
1774
1775 fxn->num_params = blob_read_uint32(ctx->blob);
1776 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1777 for (unsigned i = 0; i < fxn->num_params; i++) {
1778 uint32_t val = blob_read_uint32(ctx->blob);
1779 fxn->params[i].num_components = val & 0xff;
1780 fxn->params[i].bit_size = (val >> 8) & 0xff;
1781 }
1782
1783 fxn->is_entrypoint = flags & 0x1;
1784 if (flags & 0x4)
1785 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1786 }
1787
1788 /**
1789 * Serialize NIR into a binary blob.
1790 *
1791 * \param strip Don't serialize information only useful for debugging,
1792 * such as variable names, making cache hits from similar
1793 * shaders more likely.
1794 */
1795 void
1796 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1797 {
1798 write_ctx ctx = {0};
1799 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1800 ctx.blob = blob;
1801 ctx.nir = nir;
1802 ctx.strip = strip;
1803 util_dynarray_init(&ctx.phi_fixups, NULL);
1804
1805 size_t idx_size_offset = blob_reserve_uint32(blob);
1806
1807 struct shader_info info = nir->info;
1808 uint32_t strings = 0;
1809 if (!strip && info.name)
1810 strings |= 0x1;
1811 if (!strip && info.label)
1812 strings |= 0x2;
1813 blob_write_uint32(blob, strings);
1814 if (!strip && info.name)
1815 blob_write_string(blob, info.name);
1816 if (!strip && info.label)
1817 blob_write_string(blob, info.label);
1818 info.name = info.label = NULL;
1819 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
1820
1821 write_var_list(&ctx, &nir->uniforms);
1822 write_var_list(&ctx, &nir->inputs);
1823 write_var_list(&ctx, &nir->outputs);
1824 write_var_list(&ctx, &nir->shared);
1825 write_var_list(&ctx, &nir->globals);
1826 write_var_list(&ctx, &nir->system_values);
1827
1828 blob_write_uint32(blob, nir->num_inputs);
1829 blob_write_uint32(blob, nir->num_uniforms);
1830 blob_write_uint32(blob, nir->num_outputs);
1831 blob_write_uint32(blob, nir->num_shared);
1832 blob_write_uint32(blob, nir->scratch_size);
1833
1834 blob_write_uint32(blob, exec_list_length(&nir->functions));
1835 nir_foreach_function(fxn, nir) {
1836 write_function(&ctx, fxn);
1837 }
1838
1839 nir_foreach_function(fxn, nir) {
1840 if (fxn->impl)
1841 write_function_impl(&ctx, fxn->impl);
1842 }
1843
1844 blob_write_uint32(blob, nir->constant_data_size);
1845 if (nir->constant_data_size > 0)
1846 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
1847
1848 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
1849
1850 _mesa_hash_table_destroy(ctx.remap_table, NULL);
1851 util_dynarray_fini(&ctx.phi_fixups);
1852 }
1853
1854 nir_shader *
1855 nir_deserialize(void *mem_ctx,
1856 const struct nir_shader_compiler_options *options,
1857 struct blob_reader *blob)
1858 {
1859 read_ctx ctx = {0};
1860 ctx.blob = blob;
1861 list_inithead(&ctx.phi_srcs);
1862 ctx.idx_table_len = blob_read_uint32(blob);
1863 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
1864
1865 uint32_t strings = blob_read_uint32(blob);
1866 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
1867 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
1868
1869 struct shader_info info;
1870 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
1871
1872 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
1873
1874 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
1875 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
1876
1877 ctx.nir->info = info;
1878
1879 read_var_list(&ctx, &ctx.nir->uniforms);
1880 read_var_list(&ctx, &ctx.nir->inputs);
1881 read_var_list(&ctx, &ctx.nir->outputs);
1882 read_var_list(&ctx, &ctx.nir->shared);
1883 read_var_list(&ctx, &ctx.nir->globals);
1884 read_var_list(&ctx, &ctx.nir->system_values);
1885
1886 ctx.nir->num_inputs = blob_read_uint32(blob);
1887 ctx.nir->num_uniforms = blob_read_uint32(blob);
1888 ctx.nir->num_outputs = blob_read_uint32(blob);
1889 ctx.nir->num_shared = blob_read_uint32(blob);
1890 ctx.nir->scratch_size = blob_read_uint32(blob);
1891
1892 unsigned num_functions = blob_read_uint32(blob);
1893 for (unsigned i = 0; i < num_functions; i++)
1894 read_function(&ctx);
1895
1896 nir_foreach_function(fxn, ctx.nir) {
1897 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
1898 fxn->impl = read_function_impl(&ctx, fxn);
1899 }
1900
1901 ctx.nir->constant_data_size = blob_read_uint32(blob);
1902 if (ctx.nir->constant_data_size > 0) {
1903 ctx.nir->constant_data =
1904 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
1905 blob_copy_bytes(blob, ctx.nir->constant_data,
1906 ctx.nir->constant_data_size);
1907 }
1908
1909 free(ctx.idx_table);
1910
1911 return ctx.nir;
1912 }
1913
1914 void
1915 nir_shader_serialize_deserialize(nir_shader *shader)
1916 {
1917 const struct nir_shader_compiler_options *options = shader->options;
1918
1919 struct blob writer;
1920 blob_init(&writer);
1921 nir_serialize(&writer, shader, false);
1922
1923 /* Delete all of dest's ralloc children but leave dest alone */
1924 void *dead_ctx = ralloc_context(NULL);
1925 ralloc_adopt(dead_ctx, shader);
1926 ralloc_free(dead_ctx);
1927
1928 dead_ctx = ralloc_context(NULL);
1929
1930 struct blob_reader reader;
1931 blob_reader_init(&reader, writer.data, writer.size);
1932 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
1933
1934 blob_finish(&writer);
1935
1936 nir_shader_replace(shader, copy);
1937 ralloc_free(dead_ctx);
1938 }