1d0b696db878528c9fe72387a944af24535a6638
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* The last serialized type. */
55 const struct glsl_type *last_type;
56 const struct glsl_type *last_interface_type;
57 struct nir_variable_data last_var_data;
58
59 /* For skipping equal ALU headers (typical after scalarization). */
60 nir_instr_type last_instr_type;
61 uintptr_t last_alu_header_offset;
62
63 /* Don't write optional data such as variable names. */
64 bool strip;
65 } write_ctx;
66
67 typedef struct {
68 nir_shader *nir;
69
70 struct blob_reader *blob;
71
72 /* the next index to assign to a NIR in-memory object */
73 uint32_t next_idx;
74
75 /* The length of the index -> object table */
76 uint32_t idx_table_len;
77
78 /* map from index to deserialized pointer */
79 void **idx_table;
80
81 /* List of phi sources. */
82 struct list_head phi_srcs;
83
84 /* The last deserialized type. */
85 const struct glsl_type *last_type;
86 const struct glsl_type *last_interface_type;
87 struct nir_variable_data last_var_data;
88 } read_ctx;
89
90 static void
91 write_add_object(write_ctx *ctx, const void *obj)
92 {
93 uint32_t index = ctx->next_idx++;
94 assert(index != MAX_OBJECT_IDS);
95 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96 }
97
98 static uint32_t
99 write_lookup_object(write_ctx *ctx, const void *obj)
100 {
101 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102 assert(entry);
103 return (uint32_t)(uintptr_t) entry->data;
104 }
105
106 static void
107 read_add_object(read_ctx *ctx, void *obj)
108 {
109 assert(ctx->next_idx < ctx->idx_table_len);
110 ctx->idx_table[ctx->next_idx++] = obj;
111 }
112
113 static void *
114 read_lookup_object(read_ctx *ctx, uint32_t idx)
115 {
116 assert(idx < ctx->idx_table_len);
117 return ctx->idx_table[idx];
118 }
119
120 static void *
121 read_object(read_ctx *ctx)
122 {
123 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124 }
125
126 static uint32_t
127 encode_bit_size_3bits(uint8_t bit_size)
128 {
129 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131 if (bit_size)
132 return util_logbase2(bit_size) + 1;
133 return 0;
134 }
135
136 static uint8_t
137 decode_bit_size_3bits(uint8_t bit_size)
138 {
139 if (bit_size)
140 return 1 << (bit_size - 1);
141 return 0;
142 }
143
144 #define NUM_COMPONENTS_IS_SEPARATE_7 7
145
146 static uint8_t
147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149 if (num_components <= 4)
150 return num_components;
151 if (num_components == 8)
152 return 5;
153 if (num_components == 16)
154 return 6;
155
156 /* special value indicating that num_components is in the next uint32 */
157 return NUM_COMPONENTS_IS_SEPARATE_7;
158 }
159
160 static uint8_t
161 decode_num_components_in_3bits(uint8_t value)
162 {
163 if (value <= 4)
164 return value;
165 if (value == 5)
166 return 8;
167 if (value == 6)
168 return 16;
169
170 unreachable("invalid num_components encoding");
171 return 0;
172 }
173
174 static void
175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178 blob_write_uint32(ctx->blob, c->num_elements);
179 for (unsigned i = 0; i < c->num_elements; i++)
180 write_constant(ctx, c->elements[i]);
181 }
182
183 static nir_constant *
184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186 nir_constant *c = ralloc(nvar, nir_constant);
187
188 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189 c->num_elements = blob_read_uint32(ctx->blob);
190 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191 for (unsigned i = 0; i < c->num_elements; i++)
192 c->elements[i] = read_constant(ctx, nvar);
193
194 return c;
195 }
196
197 enum var_data_encoding {
198 var_encode_full,
199 var_encode_shader_temp,
200 var_encode_function_temp,
201 var_encode_location_diff,
202 };
203
204 union packed_var {
205 uint32_t u32;
206 struct {
207 unsigned has_name:1;
208 unsigned has_constant_initializer:1;
209 unsigned has_pointer_initializer:1;
210 unsigned has_interface_type:1;
211 unsigned num_state_slots:7;
212 unsigned data_encoding:2;
213 unsigned type_same_as_last:1;
214 unsigned interface_type_same_as_last:1;
215 unsigned _pad:1;
216 unsigned num_members:16;
217 } u;
218 };
219
220 union packed_var_data_diff {
221 uint32_t u32;
222 struct {
223 int location:13;
224 int location_frac:3;
225 int driver_location:16;
226 } u;
227 };
228
229 static void
230 write_variable(write_ctx *ctx, const nir_variable *var)
231 {
232 write_add_object(ctx, var);
233
234 assert(var->num_state_slots < (1 << 7));
235
236 STATIC_ASSERT(sizeof(union packed_var) == 4);
237 union packed_var flags;
238 flags.u32 = 0;
239
240 flags.u.has_name = !ctx->strip && var->name;
241 flags.u.has_constant_initializer = !!(var->constant_initializer);
242 flags.u.has_pointer_initializer = !!(var->pointer_initializer);
243 flags.u.has_interface_type = !!(var->interface_type);
244 flags.u.type_same_as_last = var->type == ctx->last_type;
245 flags.u.interface_type_same_as_last =
246 var->interface_type && var->interface_type == ctx->last_interface_type;
247 flags.u.num_state_slots = var->num_state_slots;
248 flags.u.num_members = var->num_members;
249
250 struct nir_variable_data data = var->data;
251
252 /* When stripping, we expect that the location is no longer needed,
253 * which is typically after shaders are linked.
254 */
255 if (ctx->strip &&
256 data.mode != nir_var_system_value &&
257 data.mode != nir_var_shader_in &&
258 data.mode != nir_var_shader_out)
259 data.location = 0;
260
261 /* Temporary variables don't serialize var->data. */
262 if (data.mode == nir_var_shader_temp)
263 flags.u.data_encoding = var_encode_shader_temp;
264 else if (data.mode == nir_var_function_temp)
265 flags.u.data_encoding = var_encode_function_temp;
266 else {
267 struct nir_variable_data tmp = data;
268
269 tmp.location = ctx->last_var_data.location;
270 tmp.location_frac = ctx->last_var_data.location_frac;
271 tmp.driver_location = ctx->last_var_data.driver_location;
272
273 /* See if we can encode only the difference in locations from the last
274 * variable.
275 */
276 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
277 abs((int)data.location -
278 (int)ctx->last_var_data.location) < (1 << 12) &&
279 abs((int)data.driver_location -
280 (int)ctx->last_var_data.driver_location) < (1 << 15))
281 flags.u.data_encoding = var_encode_location_diff;
282 else
283 flags.u.data_encoding = var_encode_full;
284 }
285
286 blob_write_uint32(ctx->blob, flags.u32);
287
288 if (!flags.u.type_same_as_last) {
289 encode_type_to_blob(ctx->blob, var->type);
290 ctx->last_type = var->type;
291 }
292
293 if (var->interface_type && !flags.u.interface_type_same_as_last) {
294 encode_type_to_blob(ctx->blob, var->interface_type);
295 ctx->last_interface_type = var->interface_type;
296 }
297
298 if (flags.u.has_name)
299 blob_write_string(ctx->blob, var->name);
300
301 if (flags.u.data_encoding == var_encode_full ||
302 flags.u.data_encoding == var_encode_location_diff) {
303 if (flags.u.data_encoding == var_encode_full) {
304 blob_write_bytes(ctx->blob, &data, sizeof(data));
305 } else {
306 /* Serialize only the difference in locations from the last variable.
307 */
308 union packed_var_data_diff diff;
309
310 diff.u.location = data.location - ctx->last_var_data.location;
311 diff.u.location_frac = data.location_frac -
312 ctx->last_var_data.location_frac;
313 diff.u.driver_location = data.driver_location -
314 ctx->last_var_data.driver_location;
315
316 blob_write_uint32(ctx->blob, diff.u32);
317 }
318
319 ctx->last_var_data = data;
320 }
321
322 for (unsigned i = 0; i < var->num_state_slots; i++) {
323 blob_write_bytes(ctx->blob, &var->state_slots[i],
324 sizeof(var->state_slots[i]));
325 }
326 if (var->constant_initializer)
327 write_constant(ctx, var->constant_initializer);
328 if (var->pointer_initializer)
329 write_lookup_object(ctx, var->pointer_initializer);
330 if (var->num_members > 0) {
331 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
332 var->num_members * sizeof(*var->members));
333 }
334 }
335
336 static nir_variable *
337 read_variable(read_ctx *ctx)
338 {
339 nir_variable *var = rzalloc(ctx->nir, nir_variable);
340 read_add_object(ctx, var);
341
342 union packed_var flags;
343 flags.u32 = blob_read_uint32(ctx->blob);
344
345 if (flags.u.type_same_as_last) {
346 var->type = ctx->last_type;
347 } else {
348 var->type = decode_type_from_blob(ctx->blob);
349 ctx->last_type = var->type;
350 }
351
352 if (flags.u.has_interface_type) {
353 if (flags.u.interface_type_same_as_last) {
354 var->interface_type = ctx->last_interface_type;
355 } else {
356 var->interface_type = decode_type_from_blob(ctx->blob);
357 ctx->last_interface_type = var->interface_type;
358 }
359 }
360
361 if (flags.u.has_name) {
362 const char *name = blob_read_string(ctx->blob);
363 var->name = ralloc_strdup(var, name);
364 } else {
365 var->name = NULL;
366 }
367
368 if (flags.u.data_encoding == var_encode_shader_temp)
369 var->data.mode = nir_var_shader_temp;
370 else if (flags.u.data_encoding == var_encode_function_temp)
371 var->data.mode = nir_var_function_temp;
372 else if (flags.u.data_encoding == var_encode_full) {
373 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
374 ctx->last_var_data = var->data;
375 } else { /* var_encode_location_diff */
376 union packed_var_data_diff diff;
377 diff.u32 = blob_read_uint32(ctx->blob);
378
379 var->data = ctx->last_var_data;
380 var->data.location += diff.u.location;
381 var->data.location_frac += diff.u.location_frac;
382 var->data.driver_location += diff.u.driver_location;
383
384 ctx->last_var_data = var->data;
385 }
386
387 var->num_state_slots = flags.u.num_state_slots;
388 if (var->num_state_slots != 0) {
389 var->state_slots = ralloc_array(var, nir_state_slot,
390 var->num_state_slots);
391 for (unsigned i = 0; i < var->num_state_slots; i++) {
392 blob_copy_bytes(ctx->blob, &var->state_slots[i],
393 sizeof(var->state_slots[i]));
394 }
395 }
396 if (flags.u.has_constant_initializer)
397 var->constant_initializer = read_constant(ctx, var);
398 else
399 var->constant_initializer = NULL;
400
401 if (flags.u.has_pointer_initializer)
402 var->pointer_initializer = read_object(ctx);
403 else
404 var->pointer_initializer = NULL;
405
406 var->num_members = flags.u.num_members;
407 if (var->num_members > 0) {
408 var->members = ralloc_array(var, struct nir_variable_data,
409 var->num_members);
410 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
411 var->num_members * sizeof(*var->members));
412 }
413
414 return var;
415 }
416
417 static void
418 write_var_list(write_ctx *ctx, const struct exec_list *src)
419 {
420 blob_write_uint32(ctx->blob, exec_list_length(src));
421 foreach_list_typed(nir_variable, var, node, src) {
422 write_variable(ctx, var);
423 }
424 }
425
426 static void
427 read_var_list(read_ctx *ctx, struct exec_list *dst)
428 {
429 exec_list_make_empty(dst);
430 unsigned num_vars = blob_read_uint32(ctx->blob);
431 for (unsigned i = 0; i < num_vars; i++) {
432 nir_variable *var = read_variable(ctx);
433 exec_list_push_tail(dst, &var->node);
434 }
435 }
436
437 static void
438 write_register(write_ctx *ctx, const nir_register *reg)
439 {
440 write_add_object(ctx, reg);
441 blob_write_uint32(ctx->blob, reg->num_components);
442 blob_write_uint32(ctx->blob, reg->bit_size);
443 blob_write_uint32(ctx->blob, reg->num_array_elems);
444 blob_write_uint32(ctx->blob, reg->index);
445 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
446 if (!ctx->strip && reg->name)
447 blob_write_string(ctx->blob, reg->name);
448 }
449
450 static nir_register *
451 read_register(read_ctx *ctx)
452 {
453 nir_register *reg = ralloc(ctx->nir, nir_register);
454 read_add_object(ctx, reg);
455 reg->num_components = blob_read_uint32(ctx->blob);
456 reg->bit_size = blob_read_uint32(ctx->blob);
457 reg->num_array_elems = blob_read_uint32(ctx->blob);
458 reg->index = blob_read_uint32(ctx->blob);
459 bool has_name = blob_read_uint32(ctx->blob);
460 if (has_name) {
461 const char *name = blob_read_string(ctx->blob);
462 reg->name = ralloc_strdup(reg, name);
463 } else {
464 reg->name = NULL;
465 }
466
467 list_inithead(&reg->uses);
468 list_inithead(&reg->defs);
469 list_inithead(&reg->if_uses);
470
471 return reg;
472 }
473
474 static void
475 write_reg_list(write_ctx *ctx, const struct exec_list *src)
476 {
477 blob_write_uint32(ctx->blob, exec_list_length(src));
478 foreach_list_typed(nir_register, reg, node, src)
479 write_register(ctx, reg);
480 }
481
482 static void
483 read_reg_list(read_ctx *ctx, struct exec_list *dst)
484 {
485 exec_list_make_empty(dst);
486 unsigned num_regs = blob_read_uint32(ctx->blob);
487 for (unsigned i = 0; i < num_regs; i++) {
488 nir_register *reg = read_register(ctx);
489 exec_list_push_tail(dst, &reg->node);
490 }
491 }
492
493 union packed_src {
494 uint32_t u32;
495 struct {
496 unsigned is_ssa:1; /* <-- Header */
497 unsigned is_indirect:1;
498 unsigned object_idx:20;
499 unsigned _footer:10; /* <-- Footer */
500 } any;
501 struct {
502 unsigned _header:22; /* <-- Header */
503 unsigned negate:1; /* <-- Footer */
504 unsigned abs:1;
505 unsigned swizzle_x:2;
506 unsigned swizzle_y:2;
507 unsigned swizzle_z:2;
508 unsigned swizzle_w:2;
509 } alu;
510 struct {
511 unsigned _header:22; /* <-- Header */
512 unsigned src_type:5; /* <-- Footer */
513 unsigned _pad:5;
514 } tex;
515 };
516
517 static void
518 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
519 {
520 /* Since sources are very frequent, we try to save some space when storing
521 * them. In particular, we store whether the source is a register and
522 * whether the register has an indirect index in the low two bits. We can
523 * assume that the high two bits of the index are zero, since otherwise our
524 * address space would've been exhausted allocating the remap table!
525 */
526 header.any.is_ssa = src->is_ssa;
527 if (src->is_ssa) {
528 header.any.object_idx = write_lookup_object(ctx, src->ssa);
529 blob_write_uint32(ctx->blob, header.u32);
530 } else {
531 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
532 header.any.is_indirect = !!src->reg.indirect;
533 blob_write_uint32(ctx->blob, header.u32);
534 blob_write_uint32(ctx->blob, src->reg.base_offset);
535 if (src->reg.indirect) {
536 union packed_src header = {0};
537 write_src_full(ctx, src->reg.indirect, header);
538 }
539 }
540 }
541
542 static void
543 write_src(write_ctx *ctx, const nir_src *src)
544 {
545 union packed_src header = {0};
546 write_src_full(ctx, src, header);
547 }
548
549 static union packed_src
550 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
551 {
552 STATIC_ASSERT(sizeof(union packed_src) == 4);
553 union packed_src header;
554 header.u32 = blob_read_uint32(ctx->blob);
555
556 src->is_ssa = header.any.is_ssa;
557 if (src->is_ssa) {
558 src->ssa = read_lookup_object(ctx, header.any.object_idx);
559 } else {
560 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
561 src->reg.base_offset = blob_read_uint32(ctx->blob);
562 if (header.any.is_indirect) {
563 src->reg.indirect = ralloc(mem_ctx, nir_src);
564 read_src(ctx, src->reg.indirect, mem_ctx);
565 } else {
566 src->reg.indirect = NULL;
567 }
568 }
569 return header;
570 }
571
572 union packed_dest {
573 uint8_t u8;
574 struct {
575 uint8_t is_ssa:1;
576 uint8_t has_name:1;
577 uint8_t num_components:3;
578 uint8_t bit_size:3;
579 } ssa;
580 struct {
581 uint8_t is_ssa:1;
582 uint8_t is_indirect:1;
583 uint8_t _pad:6;
584 } reg;
585 };
586
587 enum intrinsic_const_indices_encoding {
588 /* Use the 9 bits of packed_const_indices to store 1-9 indices.
589 * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
590 * 4 2-bit indices, or 5-9 1-bit indices.
591 *
592 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
593 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
594 */
595 const_indices_9bit_all_combined,
596
597 const_indices_8bit, /* 8 bits per element */
598 const_indices_16bit, /* 16 bits per element */
599 const_indices_32bit, /* 32 bits per element */
600 };
601
602 enum load_const_packing {
603 /* Constants are not packed and are stored in following dwords. */
604 load_const_full,
605
606 /* packed_value contains high 19 bits, low bits are 0,
607 * good for floating-point decimals
608 */
609 load_const_scalar_hi_19bits,
610
611 /* packed_value contains low 19 bits, high bits are sign-extended */
612 load_const_scalar_lo_19bits_sext,
613 };
614
615 union packed_instr {
616 uint32_t u32;
617 struct {
618 unsigned instr_type:4; /* always present */
619 unsigned _pad:20;
620 unsigned dest:8; /* always last */
621 } any;
622 struct {
623 unsigned instr_type:4;
624 unsigned exact:1;
625 unsigned no_signed_wrap:1;
626 unsigned no_unsigned_wrap:1;
627 unsigned saturate:1;
628 /* Reg: writemask; SSA: swizzles for 2 srcs */
629 unsigned writemask_or_two_swizzles:4;
630 unsigned op:9;
631 unsigned packed_src_ssa_16bit:1;
632 /* Scalarized ALUs always have the same header. */
633 unsigned num_followup_alu_sharing_header:2;
634 unsigned dest:8;
635 } alu;
636 struct {
637 unsigned instr_type:4;
638 unsigned deref_type:3;
639 unsigned cast_type_same_as_last:1;
640 unsigned mode:10; /* deref_var redefines this */
641 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
642 unsigned _pad:5; /* deref_var redefines this */
643 unsigned dest:8;
644 } deref;
645 struct {
646 unsigned instr_type:4;
647 unsigned deref_type:3;
648 unsigned _pad:1;
649 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
650 unsigned dest:8;
651 } deref_var;
652 struct {
653 unsigned instr_type:4;
654 unsigned intrinsic:9;
655 unsigned const_indices_encoding:2;
656 unsigned packed_const_indices:9;
657 unsigned dest:8;
658 } intrinsic;
659 struct {
660 unsigned instr_type:4;
661 unsigned last_component:4;
662 unsigned bit_size:3;
663 unsigned packing:2; /* enum load_const_packing */
664 unsigned packed_value:19; /* meaning determined by packing */
665 } load_const;
666 struct {
667 unsigned instr_type:4;
668 unsigned last_component:4;
669 unsigned bit_size:3;
670 unsigned _pad:21;
671 } undef;
672 struct {
673 unsigned instr_type:4;
674 unsigned num_srcs:4;
675 unsigned op:4;
676 unsigned dest:8;
677 unsigned _pad:12;
678 } tex;
679 struct {
680 unsigned instr_type:4;
681 unsigned num_srcs:20;
682 unsigned dest:8;
683 } phi;
684 struct {
685 unsigned instr_type:4;
686 unsigned type:2;
687 unsigned _pad:26;
688 } jump;
689 };
690
691 /* Write "lo24" as low 24 bits in the first uint32. */
692 static void
693 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
694 nir_instr_type instr_type)
695 {
696 STATIC_ASSERT(sizeof(union packed_dest) == 1);
697 union packed_dest dest;
698 dest.u8 = 0;
699
700 dest.ssa.is_ssa = dst->is_ssa;
701 if (dst->is_ssa) {
702 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
703 dest.ssa.num_components =
704 encode_num_components_in_3bits(dst->ssa.num_components);
705 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
706 } else {
707 dest.reg.is_indirect = !!(dst->reg.indirect);
708 }
709 header.any.dest = dest.u8;
710
711 /* Check if the current ALU instruction has the same header as the previous
712 * instruction that is also ALU. If it is, we don't have to write
713 * the current header. This is a typical occurence after scalarization.
714 */
715 if (instr_type == nir_instr_type_alu) {
716 bool equal_header = false;
717
718 if (ctx->last_instr_type == nir_instr_type_alu) {
719 assert(ctx->last_alu_header_offset);
720 union packed_instr *last_header =
721 (union packed_instr *)(ctx->blob->data +
722 ctx->last_alu_header_offset);
723
724 /* Clear the field that counts ALUs with equal headers. */
725 union packed_instr clean_header;
726 clean_header.u32 = last_header->u32;
727 clean_header.alu.num_followup_alu_sharing_header = 0;
728
729 /* There can be at most 4 consecutive ALU instructions
730 * sharing the same header.
731 */
732 if (last_header->alu.num_followup_alu_sharing_header < 3 &&
733 header.u32 == clean_header.u32) {
734 last_header->alu.num_followup_alu_sharing_header++;
735 equal_header = true;
736 }
737 }
738
739 if (!equal_header) {
740 ctx->last_alu_header_offset = ctx->blob->size;
741 blob_write_uint32(ctx->blob, header.u32);
742 }
743 } else {
744 blob_write_uint32(ctx->blob, header.u32);
745 }
746
747 if (dest.ssa.is_ssa &&
748 dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
749 blob_write_uint32(ctx->blob, dst->ssa.num_components);
750
751 if (dst->is_ssa) {
752 write_add_object(ctx, &dst->ssa);
753 if (dest.ssa.has_name)
754 blob_write_string(ctx->blob, dst->ssa.name);
755 } else {
756 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
757 blob_write_uint32(ctx->blob, dst->reg.base_offset);
758 if (dst->reg.indirect)
759 write_src(ctx, dst->reg.indirect);
760 }
761 }
762
763 static void
764 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
765 union packed_instr header)
766 {
767 union packed_dest dest;
768 dest.u8 = header.any.dest;
769
770 if (dest.ssa.is_ssa) {
771 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
772 unsigned num_components;
773 if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
774 num_components = blob_read_uint32(ctx->blob);
775 else
776 num_components = decode_num_components_in_3bits(dest.ssa.num_components);
777 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
778 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
779 read_add_object(ctx, &dst->ssa);
780 } else {
781 dst->reg.reg = read_object(ctx);
782 dst->reg.base_offset = blob_read_uint32(ctx->blob);
783 if (dest.reg.is_indirect) {
784 dst->reg.indirect = ralloc(instr, nir_src);
785 read_src(ctx, dst->reg.indirect, instr);
786 }
787 }
788 }
789
790 static bool
791 are_object_ids_16bit(write_ctx *ctx)
792 {
793 /* Check the highest object ID, because they are monotonic. */
794 return ctx->next_idx < (1 << 16);
795 }
796
797 static bool
798 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
799 {
800 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
801
802 for (unsigned i = 0; i < num_srcs; i++) {
803 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
804 return false;
805
806 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
807
808 for (unsigned chan = 0; chan < src_components; chan++) {
809 /* The swizzles for src0.x and src1.x are stored
810 * in writemask_or_two_swizzles for SSA ALUs.
811 */
812 if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
813 alu->src[i].swizzle[chan] < 4)
814 continue;
815
816 if (alu->src[i].swizzle[chan] != chan)
817 return false;
818 }
819 }
820
821 return are_object_ids_16bit(ctx);
822 }
823
824 static void
825 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
826 {
827 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
828 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
829
830 /* 9 bits for nir_op */
831 STATIC_ASSERT(nir_num_opcodes <= 512);
832 union packed_instr header;
833 header.u32 = 0;
834
835 header.alu.instr_type = alu->instr.type;
836 header.alu.exact = alu->exact;
837 header.alu.no_signed_wrap = alu->no_signed_wrap;
838 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
839 header.alu.saturate = alu->dest.saturate;
840 header.alu.op = alu->op;
841 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
842
843 if (header.alu.packed_src_ssa_16bit &&
844 alu->dest.dest.is_ssa) {
845 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
846 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
847 if (num_srcs > 1)
848 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
849 } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
850 /* For vec4 registers, this field is a writemask. */
851 header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
852 }
853
854 write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
855
856 if (!alu->dest.dest.is_ssa && dst_components > 4)
857 blob_write_uint32(ctx->blob, alu->dest.write_mask);
858
859 if (header.alu.packed_src_ssa_16bit) {
860 for (unsigned i = 0; i < num_srcs; i++) {
861 assert(alu->src[i].src.is_ssa);
862 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
863 assert(idx < (1 << 16));
864 blob_write_uint16(ctx->blob, idx);
865 }
866 } else {
867 for (unsigned i = 0; i < num_srcs; i++) {
868 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
869 unsigned src_components = nir_src_num_components(alu->src[i].src);
870 union packed_src src;
871 bool packed = src_components <= 4 && src_channels <= 4;
872 src.u32 = 0;
873
874 src.alu.negate = alu->src[i].negate;
875 src.alu.abs = alu->src[i].abs;
876
877 if (packed) {
878 src.alu.swizzle_x = alu->src[i].swizzle[0];
879 src.alu.swizzle_y = alu->src[i].swizzle[1];
880 src.alu.swizzle_z = alu->src[i].swizzle[2];
881 src.alu.swizzle_w = alu->src[i].swizzle[3];
882 }
883
884 write_src_full(ctx, &alu->src[i].src, src);
885
886 /* Store swizzles for vec8 and vec16. */
887 if (!packed) {
888 for (unsigned o = 0; o < src_channels; o += 8) {
889 unsigned value = 0;
890
891 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
892 value |= (uint32_t)alu->src[i].swizzle[o + j] <<
893 (4 * j); /* 4 bits per swizzle */
894 }
895
896 blob_write_uint32(ctx->blob, value);
897 }
898 }
899 }
900 }
901 }
902
903 static nir_alu_instr *
904 read_alu(read_ctx *ctx, union packed_instr header)
905 {
906 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
907 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
908
909 alu->exact = header.alu.exact;
910 alu->no_signed_wrap = header.alu.no_signed_wrap;
911 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
912 alu->dest.saturate = header.alu.saturate;
913
914 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
915
916 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
917
918 if (alu->dest.dest.is_ssa) {
919 alu->dest.write_mask = u_bit_consecutive(0, dst_components);
920 } else if (dst_components <= 4) {
921 alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
922 } else {
923 alu->dest.write_mask = blob_read_uint32(ctx->blob);
924 }
925
926 if (header.alu.packed_src_ssa_16bit) {
927 for (unsigned i = 0; i < num_srcs; i++) {
928 nir_alu_src *src = &alu->src[i];
929 src->src.is_ssa = true;
930 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
931
932 memset(&src->swizzle, 0, sizeof(src->swizzle));
933
934 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
935
936 for (unsigned chan = 0; chan < src_components; chan++)
937 src->swizzle[chan] = chan;
938 }
939 } else {
940 for (unsigned i = 0; i < num_srcs; i++) {
941 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
942 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
943 unsigned src_components = nir_src_num_components(alu->src[i].src);
944 bool packed = src_components <= 4 && src_channels <= 4;
945
946 alu->src[i].negate = src.alu.negate;
947 alu->src[i].abs = src.alu.abs;
948
949 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
950
951 if (packed) {
952 alu->src[i].swizzle[0] = src.alu.swizzle_x;
953 alu->src[i].swizzle[1] = src.alu.swizzle_y;
954 alu->src[i].swizzle[2] = src.alu.swizzle_z;
955 alu->src[i].swizzle[3] = src.alu.swizzle_w;
956 } else {
957 /* Load swizzles for vec8 and vec16. */
958 for (unsigned o = 0; o < src_channels; o += 8) {
959 unsigned value = blob_read_uint32(ctx->blob);
960
961 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
962 alu->src[i].swizzle[o + j] =
963 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
964 }
965 }
966 }
967 }
968 }
969
970 if (header.alu.packed_src_ssa_16bit &&
971 alu->dest.dest.is_ssa) {
972 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
973 if (num_srcs > 1)
974 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
975 }
976
977 return alu;
978 }
979
980 static void
981 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
982 {
983 assert(deref->deref_type < 8);
984 assert(deref->mode < (1 << 10));
985
986 union packed_instr header;
987 header.u32 = 0;
988
989 header.deref.instr_type = deref->instr.type;
990 header.deref.deref_type = deref->deref_type;
991
992 if (deref->deref_type == nir_deref_type_cast) {
993 header.deref.mode = deref->mode;
994 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
995 }
996
997 unsigned var_idx = 0;
998 if (deref->deref_type == nir_deref_type_var) {
999 var_idx = write_lookup_object(ctx, deref->var);
1000 if (var_idx && var_idx < (1 << 16))
1001 header.deref_var.object_idx = var_idx;
1002 }
1003
1004 if (deref->deref_type == nir_deref_type_array ||
1005 deref->deref_type == nir_deref_type_ptr_as_array) {
1006 header.deref.packed_src_ssa_16bit =
1007 deref->parent.is_ssa && deref->arr.index.is_ssa &&
1008 are_object_ids_16bit(ctx);
1009 }
1010
1011 write_dest(ctx, &deref->dest, header, deref->instr.type);
1012
1013 switch (deref->deref_type) {
1014 case nir_deref_type_var:
1015 if (!header.deref_var.object_idx)
1016 blob_write_uint32(ctx->blob, var_idx);
1017 break;
1018
1019 case nir_deref_type_struct:
1020 write_src(ctx, &deref->parent);
1021 blob_write_uint32(ctx->blob, deref->strct.index);
1022 break;
1023
1024 case nir_deref_type_array:
1025 case nir_deref_type_ptr_as_array:
1026 if (header.deref.packed_src_ssa_16bit) {
1027 blob_write_uint16(ctx->blob,
1028 write_lookup_object(ctx, deref->parent.ssa));
1029 blob_write_uint16(ctx->blob,
1030 write_lookup_object(ctx, deref->arr.index.ssa));
1031 } else {
1032 write_src(ctx, &deref->parent);
1033 write_src(ctx, &deref->arr.index);
1034 }
1035 break;
1036
1037 case nir_deref_type_cast:
1038 write_src(ctx, &deref->parent);
1039 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1040 if (!header.deref.cast_type_same_as_last) {
1041 encode_type_to_blob(ctx->blob, deref->type);
1042 ctx->last_type = deref->type;
1043 }
1044 break;
1045
1046 case nir_deref_type_array_wildcard:
1047 write_src(ctx, &deref->parent);
1048 break;
1049
1050 default:
1051 unreachable("Invalid deref type");
1052 }
1053 }
1054
1055 static nir_deref_instr *
1056 read_deref(read_ctx *ctx, union packed_instr header)
1057 {
1058 nir_deref_type deref_type = header.deref.deref_type;
1059 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1060
1061 read_dest(ctx, &deref->dest, &deref->instr, header);
1062
1063 nir_deref_instr *parent;
1064
1065 switch (deref->deref_type) {
1066 case nir_deref_type_var:
1067 if (header.deref_var.object_idx)
1068 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1069 else
1070 deref->var = read_object(ctx);
1071
1072 deref->type = deref->var->type;
1073 break;
1074
1075 case nir_deref_type_struct:
1076 read_src(ctx, &deref->parent, &deref->instr);
1077 parent = nir_src_as_deref(deref->parent);
1078 deref->strct.index = blob_read_uint32(ctx->blob);
1079 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1080 break;
1081
1082 case nir_deref_type_array:
1083 case nir_deref_type_ptr_as_array:
1084 if (header.deref.packed_src_ssa_16bit) {
1085 deref->parent.is_ssa = true;
1086 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1087 deref->arr.index.is_ssa = true;
1088 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1089 } else {
1090 read_src(ctx, &deref->parent, &deref->instr);
1091 read_src(ctx, &deref->arr.index, &deref->instr);
1092 }
1093
1094 parent = nir_src_as_deref(deref->parent);
1095 if (deref->deref_type == nir_deref_type_array)
1096 deref->type = glsl_get_array_element(parent->type);
1097 else
1098 deref->type = parent->type;
1099 break;
1100
1101 case nir_deref_type_cast:
1102 read_src(ctx, &deref->parent, &deref->instr);
1103 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1104 if (header.deref.cast_type_same_as_last) {
1105 deref->type = ctx->last_type;
1106 } else {
1107 deref->type = decode_type_from_blob(ctx->blob);
1108 ctx->last_type = deref->type;
1109 }
1110 break;
1111
1112 case nir_deref_type_array_wildcard:
1113 read_src(ctx, &deref->parent, &deref->instr);
1114 parent = nir_src_as_deref(deref->parent);
1115 deref->type = glsl_get_array_element(parent->type);
1116 break;
1117
1118 default:
1119 unreachable("Invalid deref type");
1120 }
1121
1122 if (deref_type == nir_deref_type_var) {
1123 deref->mode = deref->var->data.mode;
1124 } else if (deref->deref_type == nir_deref_type_cast) {
1125 deref->mode = header.deref.mode;
1126 } else {
1127 assert(deref->parent.is_ssa);
1128 deref->mode = nir_instr_as_deref(deref->parent.ssa->parent_instr)->mode;
1129 }
1130
1131 return deref;
1132 }
1133
1134 static void
1135 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1136 {
1137 /* 9 bits for nir_intrinsic_op */
1138 STATIC_ASSERT(nir_num_intrinsics <= 512);
1139 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1140 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1141 assert(intrin->intrinsic < 512);
1142
1143 union packed_instr header;
1144 header.u32 = 0;
1145
1146 header.intrinsic.instr_type = intrin->instr.type;
1147 header.intrinsic.intrinsic = intrin->intrinsic;
1148
1149 /* Analyze constant indices to decide how to encode them. */
1150 if (num_indices) {
1151 unsigned max_bits = 0;
1152 for (unsigned i = 0; i < num_indices; i++) {
1153 unsigned max = util_last_bit(intrin->const_index[i]);
1154 max_bits = MAX2(max_bits, max);
1155 }
1156
1157 if (max_bits * num_indices <= 9) {
1158 header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
1159
1160 /* Pack all const indices into 6 bits. */
1161 unsigned bit_size = 9 / num_indices;
1162 for (unsigned i = 0; i < num_indices; i++) {
1163 header.intrinsic.packed_const_indices |=
1164 intrin->const_index[i] << (i * bit_size);
1165 }
1166 } else if (max_bits <= 8)
1167 header.intrinsic.const_indices_encoding = const_indices_8bit;
1168 else if (max_bits <= 16)
1169 header.intrinsic.const_indices_encoding = const_indices_16bit;
1170 else
1171 header.intrinsic.const_indices_encoding = const_indices_32bit;
1172 }
1173
1174 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1175 write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1176 else
1177 blob_write_uint32(ctx->blob, header.u32);
1178
1179 for (unsigned i = 0; i < num_srcs; i++)
1180 write_src(ctx, &intrin->src[i]);
1181
1182 if (num_indices) {
1183 switch (header.intrinsic.const_indices_encoding) {
1184 case const_indices_8bit:
1185 for (unsigned i = 0; i < num_indices; i++)
1186 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1187 break;
1188 case const_indices_16bit:
1189 for (unsigned i = 0; i < num_indices; i++)
1190 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1191 break;
1192 case const_indices_32bit:
1193 for (unsigned i = 0; i < num_indices; i++)
1194 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1195 break;
1196 }
1197 }
1198 }
1199
1200 static nir_intrinsic_instr *
1201 read_intrinsic(read_ctx *ctx, union packed_instr header)
1202 {
1203 nir_intrinsic_op op = header.intrinsic.intrinsic;
1204 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1205
1206 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1207 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1208
1209 if (nir_intrinsic_infos[op].has_dest)
1210 read_dest(ctx, &intrin->dest, &intrin->instr, header);
1211
1212 for (unsigned i = 0; i < num_srcs; i++)
1213 read_src(ctx, &intrin->src[i], &intrin->instr);
1214
1215 /* Vectorized instrinsics have num_components same as dst or src that has
1216 * 0 components in the info. Find it.
1217 */
1218 if (nir_intrinsic_infos[op].has_dest &&
1219 nir_intrinsic_infos[op].dest_components == 0) {
1220 intrin->num_components = nir_dest_num_components(intrin->dest);
1221 } else {
1222 for (unsigned i = 0; i < num_srcs; i++) {
1223 if (nir_intrinsic_infos[op].src_components[i] == 0) {
1224 intrin->num_components = nir_src_num_components(intrin->src[i]);
1225 break;
1226 }
1227 }
1228 }
1229
1230 if (num_indices) {
1231 switch (header.intrinsic.const_indices_encoding) {
1232 case const_indices_9bit_all_combined: {
1233 unsigned bit_size = 9 / num_indices;
1234 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1235 for (unsigned i = 0; i < num_indices; i++) {
1236 intrin->const_index[i] =
1237 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1238 bit_mask;
1239 }
1240 break;
1241 }
1242 case const_indices_8bit:
1243 for (unsigned i = 0; i < num_indices; i++)
1244 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1245 break;
1246 case const_indices_16bit:
1247 for (unsigned i = 0; i < num_indices; i++)
1248 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1249 break;
1250 case const_indices_32bit:
1251 for (unsigned i = 0; i < num_indices; i++)
1252 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1253 break;
1254 }
1255 }
1256
1257 return intrin;
1258 }
1259
1260 static void
1261 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1262 {
1263 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1264 union packed_instr header;
1265 header.u32 = 0;
1266
1267 header.load_const.instr_type = lc->instr.type;
1268 header.load_const.last_component = lc->def.num_components - 1;
1269 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1270 header.load_const.packing = load_const_full;
1271
1272 /* Try to pack 1-component constants into the 19 free bits in the header. */
1273 if (lc->def.num_components == 1) {
1274 switch (lc->def.bit_size) {
1275 case 64:
1276 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1277 /* packed_value contains high 19 bits, low bits are 0 */
1278 header.load_const.packing = load_const_scalar_hi_19bits;
1279 header.load_const.packed_value = lc->value[0].u64 >> 45;
1280 } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1281 /* packed_value contains low 19 bits, high bits are sign-extended */
1282 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1283 header.load_const.packed_value = lc->value[0].u64;
1284 }
1285 break;
1286
1287 case 32:
1288 if ((lc->value[0].u32 & 0x1fff) == 0) {
1289 header.load_const.packing = load_const_scalar_hi_19bits;
1290 header.load_const.packed_value = lc->value[0].u32 >> 13;
1291 } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1292 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1293 header.load_const.packed_value = lc->value[0].u32;
1294 }
1295 break;
1296
1297 case 16:
1298 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1299 header.load_const.packed_value = lc->value[0].u16;
1300 break;
1301 case 8:
1302 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1303 header.load_const.packed_value = lc->value[0].u8;
1304 break;
1305 case 1:
1306 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1307 header.load_const.packed_value = lc->value[0].b;
1308 break;
1309 default:
1310 unreachable("invalid bit_size");
1311 }
1312 }
1313
1314 blob_write_uint32(ctx->blob, header.u32);
1315
1316 if (header.load_const.packing == load_const_full) {
1317 switch (lc->def.bit_size) {
1318 case 64:
1319 blob_write_bytes(ctx->blob, lc->value,
1320 sizeof(*lc->value) * lc->def.num_components);
1321 break;
1322
1323 case 32:
1324 for (unsigned i = 0; i < lc->def.num_components; i++)
1325 blob_write_uint32(ctx->blob, lc->value[i].u32);
1326 break;
1327
1328 case 16:
1329 for (unsigned i = 0; i < lc->def.num_components; i++)
1330 blob_write_uint16(ctx->blob, lc->value[i].u16);
1331 break;
1332
1333 default:
1334 assert(lc->def.bit_size <= 8);
1335 for (unsigned i = 0; i < lc->def.num_components; i++)
1336 blob_write_uint8(ctx->blob, lc->value[i].u8);
1337 break;
1338 }
1339 }
1340
1341 write_add_object(ctx, &lc->def);
1342 }
1343
1344 static nir_load_const_instr *
1345 read_load_const(read_ctx *ctx, union packed_instr header)
1346 {
1347 nir_load_const_instr *lc =
1348 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1349 decode_bit_size_3bits(header.load_const.bit_size));
1350
1351 switch (header.load_const.packing) {
1352 case load_const_scalar_hi_19bits:
1353 switch (lc->def.bit_size) {
1354 case 64:
1355 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1356 break;
1357 case 32:
1358 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1359 break;
1360 default:
1361 unreachable("invalid bit_size");
1362 }
1363 break;
1364
1365 case load_const_scalar_lo_19bits_sext:
1366 switch (lc->def.bit_size) {
1367 case 64:
1368 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1369 break;
1370 case 32:
1371 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1372 break;
1373 case 16:
1374 lc->value[0].u16 = header.load_const.packed_value;
1375 break;
1376 case 8:
1377 lc->value[0].u8 = header.load_const.packed_value;
1378 break;
1379 case 1:
1380 lc->value[0].b = header.load_const.packed_value;
1381 break;
1382 default:
1383 unreachable("invalid bit_size");
1384 }
1385 break;
1386
1387 case load_const_full:
1388 switch (lc->def.bit_size) {
1389 case 64:
1390 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1391 break;
1392
1393 case 32:
1394 for (unsigned i = 0; i < lc->def.num_components; i++)
1395 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1396 break;
1397
1398 case 16:
1399 for (unsigned i = 0; i < lc->def.num_components; i++)
1400 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1401 break;
1402
1403 default:
1404 assert(lc->def.bit_size <= 8);
1405 for (unsigned i = 0; i < lc->def.num_components; i++)
1406 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1407 break;
1408 }
1409 break;
1410 }
1411
1412 read_add_object(ctx, &lc->def);
1413 return lc;
1414 }
1415
1416 static void
1417 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1418 {
1419 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1420
1421 union packed_instr header;
1422 header.u32 = 0;
1423
1424 header.undef.instr_type = undef->instr.type;
1425 header.undef.last_component = undef->def.num_components - 1;
1426 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1427
1428 blob_write_uint32(ctx->blob, header.u32);
1429 write_add_object(ctx, &undef->def);
1430 }
1431
1432 static nir_ssa_undef_instr *
1433 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1434 {
1435 nir_ssa_undef_instr *undef =
1436 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1437 decode_bit_size_3bits(header.undef.bit_size));
1438
1439 read_add_object(ctx, &undef->def);
1440 return undef;
1441 }
1442
1443 union packed_tex_data {
1444 uint32_t u32;
1445 struct {
1446 unsigned sampler_dim:4;
1447 unsigned dest_type:8;
1448 unsigned coord_components:3;
1449 unsigned is_array:1;
1450 unsigned is_shadow:1;
1451 unsigned is_new_style_shadow:1;
1452 unsigned component:2;
1453 unsigned texture_non_uniform:1;
1454 unsigned sampler_non_uniform:1;
1455 unsigned unused:8; /* Mark unused for valgrind. */
1456 } u;
1457 };
1458
1459 static void
1460 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1461 {
1462 assert(tex->num_srcs < 16);
1463 assert(tex->op < 16);
1464
1465 union packed_instr header;
1466 header.u32 = 0;
1467
1468 header.tex.instr_type = tex->instr.type;
1469 header.tex.num_srcs = tex->num_srcs;
1470 header.tex.op = tex->op;
1471
1472 write_dest(ctx, &tex->dest, header, tex->instr.type);
1473
1474 blob_write_uint32(ctx->blob, tex->texture_index);
1475 blob_write_uint32(ctx->blob, tex->sampler_index);
1476 if (tex->op == nir_texop_tg4)
1477 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1478
1479 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1480 union packed_tex_data packed = {
1481 .u.sampler_dim = tex->sampler_dim,
1482 .u.dest_type = tex->dest_type,
1483 .u.coord_components = tex->coord_components,
1484 .u.is_array = tex->is_array,
1485 .u.is_shadow = tex->is_shadow,
1486 .u.is_new_style_shadow = tex->is_new_style_shadow,
1487 .u.component = tex->component,
1488 .u.texture_non_uniform = tex->texture_non_uniform,
1489 .u.sampler_non_uniform = tex->sampler_non_uniform,
1490 };
1491 blob_write_uint32(ctx->blob, packed.u32);
1492
1493 for (unsigned i = 0; i < tex->num_srcs; i++) {
1494 union packed_src src;
1495 src.u32 = 0;
1496 src.tex.src_type = tex->src[i].src_type;
1497 write_src_full(ctx, &tex->src[i].src, src);
1498 }
1499 }
1500
1501 static nir_tex_instr *
1502 read_tex(read_ctx *ctx, union packed_instr header)
1503 {
1504 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1505
1506 read_dest(ctx, &tex->dest, &tex->instr, header);
1507
1508 tex->op = header.tex.op;
1509 tex->texture_index = blob_read_uint32(ctx->blob);
1510 tex->sampler_index = blob_read_uint32(ctx->blob);
1511 if (tex->op == nir_texop_tg4)
1512 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1513
1514 union packed_tex_data packed;
1515 packed.u32 = blob_read_uint32(ctx->blob);
1516 tex->sampler_dim = packed.u.sampler_dim;
1517 tex->dest_type = packed.u.dest_type;
1518 tex->coord_components = packed.u.coord_components;
1519 tex->is_array = packed.u.is_array;
1520 tex->is_shadow = packed.u.is_shadow;
1521 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1522 tex->component = packed.u.component;
1523 tex->texture_non_uniform = packed.u.texture_non_uniform;
1524 tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1525
1526 for (unsigned i = 0; i < tex->num_srcs; i++) {
1527 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1528 tex->src[i].src_type = src.tex.src_type;
1529 }
1530
1531 return tex;
1532 }
1533
1534 static void
1535 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1536 {
1537 union packed_instr header;
1538 header.u32 = 0;
1539
1540 header.phi.instr_type = phi->instr.type;
1541 header.phi.num_srcs = exec_list_length(&phi->srcs);
1542
1543 /* Phi nodes are special, since they may reference SSA definitions and
1544 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1545 * and then store enough information so that a later fixup pass can fill
1546 * them in correctly.
1547 */
1548 write_dest(ctx, &phi->dest, header, phi->instr.type);
1549
1550 nir_foreach_phi_src(src, phi) {
1551 assert(src->src.is_ssa);
1552 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1553 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1554 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1555 write_phi_fixup fixup = {
1556 .blob_offset = blob_offset,
1557 .src = src->src.ssa,
1558 .block = src->pred,
1559 };
1560 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1561 }
1562 }
1563
1564 static void
1565 write_fixup_phis(write_ctx *ctx)
1566 {
1567 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1568 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1569 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1570 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1571 }
1572
1573 util_dynarray_clear(&ctx->phi_fixups);
1574 }
1575
1576 static nir_phi_instr *
1577 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1578 {
1579 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1580
1581 read_dest(ctx, &phi->dest, &phi->instr, header);
1582
1583 /* For similar reasons as before, we just store the index directly into the
1584 * pointer, and let a later pass resolve the phi sources.
1585 *
1586 * In order to ensure that the copied sources (which are just the indices
1587 * from the blob for now) don't get inserted into the old shader's use-def
1588 * lists, we have to add the phi instruction *before* we set up its
1589 * sources.
1590 */
1591 nir_instr_insert_after_block(blk, &phi->instr);
1592
1593 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1594 nir_phi_src *src = ralloc(phi, nir_phi_src);
1595
1596 src->src.is_ssa = true;
1597 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1598 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1599
1600 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1601 * we have to set the parent_instr manually. It doesn't really matter
1602 * when we do it, so we might as well do it here.
1603 */
1604 src->src.parent_instr = &phi->instr;
1605
1606 /* Stash it in the list of phi sources. We'll walk this list and fix up
1607 * sources at the very end of read_function_impl.
1608 */
1609 list_add(&src->src.use_link, &ctx->phi_srcs);
1610
1611 exec_list_push_tail(&phi->srcs, &src->node);
1612 }
1613
1614 return phi;
1615 }
1616
1617 static void
1618 read_fixup_phis(read_ctx *ctx)
1619 {
1620 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1621 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1622 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1623
1624 /* Remove from this list */
1625 list_del(&src->src.use_link);
1626
1627 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1628 }
1629 assert(list_is_empty(&ctx->phi_srcs));
1630 }
1631
1632 static void
1633 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1634 {
1635 assert(jmp->type < 4);
1636
1637 union packed_instr header;
1638 header.u32 = 0;
1639
1640 header.jump.instr_type = jmp->instr.type;
1641 header.jump.type = jmp->type;
1642
1643 blob_write_uint32(ctx->blob, header.u32);
1644 }
1645
1646 static nir_jump_instr *
1647 read_jump(read_ctx *ctx, union packed_instr header)
1648 {
1649 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1650 return jmp;
1651 }
1652
1653 static void
1654 write_call(write_ctx *ctx, const nir_call_instr *call)
1655 {
1656 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1657
1658 for (unsigned i = 0; i < call->num_params; i++)
1659 write_src(ctx, &call->params[i]);
1660 }
1661
1662 static nir_call_instr *
1663 read_call(read_ctx *ctx)
1664 {
1665 nir_function *callee = read_object(ctx);
1666 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1667
1668 for (unsigned i = 0; i < call->num_params; i++)
1669 read_src(ctx, &call->params[i], call);
1670
1671 return call;
1672 }
1673
1674 static void
1675 write_instr(write_ctx *ctx, const nir_instr *instr)
1676 {
1677 /* We have only 4 bits for the instruction type. */
1678 assert(instr->type < 16);
1679
1680 switch (instr->type) {
1681 case nir_instr_type_alu:
1682 write_alu(ctx, nir_instr_as_alu(instr));
1683 break;
1684 case nir_instr_type_deref:
1685 write_deref(ctx, nir_instr_as_deref(instr));
1686 break;
1687 case nir_instr_type_intrinsic:
1688 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1689 break;
1690 case nir_instr_type_load_const:
1691 write_load_const(ctx, nir_instr_as_load_const(instr));
1692 break;
1693 case nir_instr_type_ssa_undef:
1694 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1695 break;
1696 case nir_instr_type_tex:
1697 write_tex(ctx, nir_instr_as_tex(instr));
1698 break;
1699 case nir_instr_type_phi:
1700 write_phi(ctx, nir_instr_as_phi(instr));
1701 break;
1702 case nir_instr_type_jump:
1703 write_jump(ctx, nir_instr_as_jump(instr));
1704 break;
1705 case nir_instr_type_call:
1706 blob_write_uint32(ctx->blob, instr->type);
1707 write_call(ctx, nir_instr_as_call(instr));
1708 break;
1709 case nir_instr_type_parallel_copy:
1710 unreachable("Cannot write parallel copies");
1711 default:
1712 unreachable("bad instr type");
1713 }
1714 }
1715
1716 /* Return the number of instructions read. */
1717 static unsigned
1718 read_instr(read_ctx *ctx, nir_block *block)
1719 {
1720 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1721 union packed_instr header;
1722 header.u32 = blob_read_uint32(ctx->blob);
1723 nir_instr *instr;
1724
1725 switch (header.any.instr_type) {
1726 case nir_instr_type_alu:
1727 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1728 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1729 return header.alu.num_followup_alu_sharing_header + 1;
1730 case nir_instr_type_deref:
1731 instr = &read_deref(ctx, header)->instr;
1732 break;
1733 case nir_instr_type_intrinsic:
1734 instr = &read_intrinsic(ctx, header)->instr;
1735 break;
1736 case nir_instr_type_load_const:
1737 instr = &read_load_const(ctx, header)->instr;
1738 break;
1739 case nir_instr_type_ssa_undef:
1740 instr = &read_ssa_undef(ctx, header)->instr;
1741 break;
1742 case nir_instr_type_tex:
1743 instr = &read_tex(ctx, header)->instr;
1744 break;
1745 case nir_instr_type_phi:
1746 /* Phi instructions are a bit of a special case when reading because we
1747 * don't want inserting the instruction to automatically handle use/defs
1748 * for us. Instead, we need to wait until all the blocks/instructions
1749 * are read so that we can set their sources up.
1750 */
1751 read_phi(ctx, block, header);
1752 return 1;
1753 case nir_instr_type_jump:
1754 instr = &read_jump(ctx, header)->instr;
1755 break;
1756 case nir_instr_type_call:
1757 instr = &read_call(ctx)->instr;
1758 break;
1759 case nir_instr_type_parallel_copy:
1760 unreachable("Cannot read parallel copies");
1761 default:
1762 unreachable("bad instr type");
1763 }
1764
1765 nir_instr_insert_after_block(block, instr);
1766 return 1;
1767 }
1768
1769 static void
1770 write_block(write_ctx *ctx, const nir_block *block)
1771 {
1772 write_add_object(ctx, block);
1773 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1774
1775 ctx->last_instr_type = ~0;
1776 ctx->last_alu_header_offset = 0;
1777
1778 nir_foreach_instr(instr, block) {
1779 write_instr(ctx, instr);
1780 ctx->last_instr_type = instr->type;
1781 }
1782 }
1783
1784 static void
1785 read_block(read_ctx *ctx, struct exec_list *cf_list)
1786 {
1787 /* Don't actually create a new block. Just use the one from the tail of
1788 * the list. NIR guarantees that the tail of the list is a block and that
1789 * no two blocks are side-by-side in the IR; It should be empty.
1790 */
1791 nir_block *block =
1792 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1793
1794 read_add_object(ctx, block);
1795 unsigned num_instrs = blob_read_uint32(ctx->blob);
1796 for (unsigned i = 0; i < num_instrs;) {
1797 i += read_instr(ctx, block);
1798 }
1799 }
1800
1801 static void
1802 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1803
1804 static void
1805 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1806
1807 static void
1808 write_if(write_ctx *ctx, nir_if *nif)
1809 {
1810 write_src(ctx, &nif->condition);
1811
1812 write_cf_list(ctx, &nif->then_list);
1813 write_cf_list(ctx, &nif->else_list);
1814 }
1815
1816 static void
1817 read_if(read_ctx *ctx, struct exec_list *cf_list)
1818 {
1819 nir_if *nif = nir_if_create(ctx->nir);
1820
1821 read_src(ctx, &nif->condition, nif);
1822
1823 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1824
1825 read_cf_list(ctx, &nif->then_list);
1826 read_cf_list(ctx, &nif->else_list);
1827 }
1828
1829 static void
1830 write_loop(write_ctx *ctx, nir_loop *loop)
1831 {
1832 write_cf_list(ctx, &loop->body);
1833 }
1834
1835 static void
1836 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1837 {
1838 nir_loop *loop = nir_loop_create(ctx->nir);
1839
1840 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1841
1842 read_cf_list(ctx, &loop->body);
1843 }
1844
1845 static void
1846 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1847 {
1848 blob_write_uint32(ctx->blob, cf->type);
1849
1850 switch (cf->type) {
1851 case nir_cf_node_block:
1852 write_block(ctx, nir_cf_node_as_block(cf));
1853 break;
1854 case nir_cf_node_if:
1855 write_if(ctx, nir_cf_node_as_if(cf));
1856 break;
1857 case nir_cf_node_loop:
1858 write_loop(ctx, nir_cf_node_as_loop(cf));
1859 break;
1860 default:
1861 unreachable("bad cf type");
1862 }
1863 }
1864
1865 static void
1866 read_cf_node(read_ctx *ctx, struct exec_list *list)
1867 {
1868 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1869
1870 switch (type) {
1871 case nir_cf_node_block:
1872 read_block(ctx, list);
1873 break;
1874 case nir_cf_node_if:
1875 read_if(ctx, list);
1876 break;
1877 case nir_cf_node_loop:
1878 read_loop(ctx, list);
1879 break;
1880 default:
1881 unreachable("bad cf type");
1882 }
1883 }
1884
1885 static void
1886 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1887 {
1888 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1889 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1890 write_cf_node(ctx, cf);
1891 }
1892 }
1893
1894 static void
1895 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1896 {
1897 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1898 for (unsigned i = 0; i < num_cf_nodes; i++)
1899 read_cf_node(ctx, cf_list);
1900 }
1901
1902 static void
1903 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1904 {
1905 blob_write_uint8(ctx->blob, fi->structured);
1906
1907 write_var_list(ctx, &fi->locals);
1908 write_reg_list(ctx, &fi->registers);
1909 blob_write_uint32(ctx->blob, fi->reg_alloc);
1910
1911 write_cf_list(ctx, &fi->body);
1912 write_fixup_phis(ctx);
1913 }
1914
1915 static nir_function_impl *
1916 read_function_impl(read_ctx *ctx, nir_function *fxn)
1917 {
1918 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1919 fi->function = fxn;
1920
1921 fi->structured = blob_read_uint8(ctx->blob);
1922
1923 read_var_list(ctx, &fi->locals);
1924 read_reg_list(ctx, &fi->registers);
1925 fi->reg_alloc = blob_read_uint32(ctx->blob);
1926
1927 read_cf_list(ctx, &fi->body);
1928 read_fixup_phis(ctx);
1929
1930 fi->valid_metadata = 0;
1931
1932 return fi;
1933 }
1934
1935 static void
1936 write_function(write_ctx *ctx, const nir_function *fxn)
1937 {
1938 uint32_t flags = fxn->is_entrypoint;
1939 if (fxn->name)
1940 flags |= 0x2;
1941 if (fxn->impl)
1942 flags |= 0x4;
1943 blob_write_uint32(ctx->blob, flags);
1944 if (fxn->name)
1945 blob_write_string(ctx->blob, fxn->name);
1946
1947 write_add_object(ctx, fxn);
1948
1949 blob_write_uint32(ctx->blob, fxn->num_params);
1950 for (unsigned i = 0; i < fxn->num_params; i++) {
1951 uint32_t val =
1952 ((uint32_t)fxn->params[i].num_components) |
1953 ((uint32_t)fxn->params[i].bit_size) << 8;
1954 blob_write_uint32(ctx->blob, val);
1955 }
1956
1957 /* At first glance, it looks like we should write the function_impl here.
1958 * However, call instructions need to be able to reference at least the
1959 * function and those will get processed as we write the function_impls.
1960 * We stop here and write function_impls as a second pass.
1961 */
1962 }
1963
1964 static void
1965 read_function(read_ctx *ctx)
1966 {
1967 uint32_t flags = blob_read_uint32(ctx->blob);
1968 bool has_name = flags & 0x2;
1969 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1970
1971 nir_function *fxn = nir_function_create(ctx->nir, name);
1972
1973 read_add_object(ctx, fxn);
1974
1975 fxn->num_params = blob_read_uint32(ctx->blob);
1976 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1977 for (unsigned i = 0; i < fxn->num_params; i++) {
1978 uint32_t val = blob_read_uint32(ctx->blob);
1979 fxn->params[i].num_components = val & 0xff;
1980 fxn->params[i].bit_size = (val >> 8) & 0xff;
1981 }
1982
1983 fxn->is_entrypoint = flags & 0x1;
1984 if (flags & 0x4)
1985 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1986 }
1987
1988 /**
1989 * Serialize NIR into a binary blob.
1990 *
1991 * \param strip Don't serialize information only useful for debugging,
1992 * such as variable names, making cache hits from similar
1993 * shaders more likely.
1994 */
1995 void
1996 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1997 {
1998 write_ctx ctx = {0};
1999 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2000 ctx.blob = blob;
2001 ctx.nir = nir;
2002 ctx.strip = strip;
2003 util_dynarray_init(&ctx.phi_fixups, NULL);
2004
2005 size_t idx_size_offset = blob_reserve_uint32(blob);
2006
2007 struct shader_info info = nir->info;
2008 uint32_t strings = 0;
2009 if (!strip && info.name)
2010 strings |= 0x1;
2011 if (!strip && info.label)
2012 strings |= 0x2;
2013 blob_write_uint32(blob, strings);
2014 if (!strip && info.name)
2015 blob_write_string(blob, info.name);
2016 if (!strip && info.label)
2017 blob_write_string(blob, info.label);
2018 info.name = info.label = NULL;
2019 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2020
2021 write_var_list(&ctx, &nir->variables);
2022
2023 blob_write_uint32(blob, nir->num_inputs);
2024 blob_write_uint32(blob, nir->num_uniforms);
2025 blob_write_uint32(blob, nir->num_outputs);
2026 blob_write_uint32(blob, nir->shared_size);
2027 blob_write_uint32(blob, nir->scratch_size);
2028
2029 blob_write_uint32(blob, exec_list_length(&nir->functions));
2030 nir_foreach_function(fxn, nir) {
2031 write_function(&ctx, fxn);
2032 }
2033
2034 nir_foreach_function(fxn, nir) {
2035 if (fxn->impl)
2036 write_function_impl(&ctx, fxn->impl);
2037 }
2038
2039 blob_write_uint32(blob, nir->constant_data_size);
2040 if (nir->constant_data_size > 0)
2041 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2042
2043 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
2044
2045 _mesa_hash_table_destroy(ctx.remap_table, NULL);
2046 util_dynarray_fini(&ctx.phi_fixups);
2047 }
2048
2049 nir_shader *
2050 nir_deserialize(void *mem_ctx,
2051 const struct nir_shader_compiler_options *options,
2052 struct blob_reader *blob)
2053 {
2054 read_ctx ctx = {0};
2055 ctx.blob = blob;
2056 list_inithead(&ctx.phi_srcs);
2057 ctx.idx_table_len = blob_read_uint32(blob);
2058 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2059
2060 uint32_t strings = blob_read_uint32(blob);
2061 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2062 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2063
2064 struct shader_info info;
2065 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2066
2067 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2068
2069 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2070 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2071
2072 ctx.nir->info = info;
2073
2074 read_var_list(&ctx, &ctx.nir->variables);
2075
2076 ctx.nir->num_inputs = blob_read_uint32(blob);
2077 ctx.nir->num_uniforms = blob_read_uint32(blob);
2078 ctx.nir->num_outputs = blob_read_uint32(blob);
2079 ctx.nir->shared_size = blob_read_uint32(blob);
2080 ctx.nir->scratch_size = blob_read_uint32(blob);
2081
2082 unsigned num_functions = blob_read_uint32(blob);
2083 for (unsigned i = 0; i < num_functions; i++)
2084 read_function(&ctx);
2085
2086 nir_foreach_function(fxn, ctx.nir) {
2087 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2088 fxn->impl = read_function_impl(&ctx, fxn);
2089 }
2090
2091 ctx.nir->constant_data_size = blob_read_uint32(blob);
2092 if (ctx.nir->constant_data_size > 0) {
2093 ctx.nir->constant_data =
2094 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2095 blob_copy_bytes(blob, ctx.nir->constant_data,
2096 ctx.nir->constant_data_size);
2097 }
2098
2099 free(ctx.idx_table);
2100
2101 return ctx.nir;
2102 }
2103
2104 void
2105 nir_shader_serialize_deserialize(nir_shader *shader)
2106 {
2107 const struct nir_shader_compiler_options *options = shader->options;
2108
2109 struct blob writer;
2110 blob_init(&writer);
2111 nir_serialize(&writer, shader, false);
2112
2113 /* Delete all of dest's ralloc children but leave dest alone */
2114 void *dead_ctx = ralloc_context(NULL);
2115 ralloc_adopt(dead_ctx, shader);
2116 ralloc_free(dead_ctx);
2117
2118 dead_ctx = ralloc_context(NULL);
2119
2120 struct blob_reader reader;
2121 blob_reader_init(&reader, writer.data, writer.size);
2122 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2123
2124 blob_finish(&writer);
2125
2126 nir_shader_replace(shader, copy);
2127 ralloc_free(dead_ctx);
2128 }