6688e9e8b172260ed05986b4f7052238774601e6
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* The last serialized type. */
55 const struct glsl_type *last_type;
56 const struct glsl_type *last_interface_type;
57 struct nir_variable_data last_var_data;
58
59 /* For skipping equal ALU headers (typical after scalarization). */
60 nir_instr_type last_instr_type;
61 uintptr_t last_alu_header_offset;
62
63 /* Don't write optional data such as variable names. */
64 bool strip;
65 } write_ctx;
66
67 typedef struct {
68 nir_shader *nir;
69
70 struct blob_reader *blob;
71
72 /* the next index to assign to a NIR in-memory object */
73 uint32_t next_idx;
74
75 /* The length of the index -> object table */
76 uint32_t idx_table_len;
77
78 /* map from index to deserialized pointer */
79 void **idx_table;
80
81 /* List of phi sources. */
82 struct list_head phi_srcs;
83
84 /* The last deserialized type. */
85 const struct glsl_type *last_type;
86 const struct glsl_type *last_interface_type;
87 struct nir_variable_data last_var_data;
88 } read_ctx;
89
90 static void
91 write_add_object(write_ctx *ctx, const void *obj)
92 {
93 uint32_t index = ctx->next_idx++;
94 assert(index != MAX_OBJECT_IDS);
95 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96 }
97
98 static uint32_t
99 write_lookup_object(write_ctx *ctx, const void *obj)
100 {
101 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102 assert(entry);
103 return (uint32_t)(uintptr_t) entry->data;
104 }
105
106 static void
107 read_add_object(read_ctx *ctx, void *obj)
108 {
109 assert(ctx->next_idx < ctx->idx_table_len);
110 ctx->idx_table[ctx->next_idx++] = obj;
111 }
112
113 static void *
114 read_lookup_object(read_ctx *ctx, uint32_t idx)
115 {
116 assert(idx < ctx->idx_table_len);
117 return ctx->idx_table[idx];
118 }
119
120 static void *
121 read_object(read_ctx *ctx)
122 {
123 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124 }
125
126 static uint32_t
127 encode_bit_size_3bits(uint8_t bit_size)
128 {
129 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131 if (bit_size)
132 return util_logbase2(bit_size) + 1;
133 return 0;
134 }
135
136 static uint8_t
137 decode_bit_size_3bits(uint8_t bit_size)
138 {
139 if (bit_size)
140 return 1 << (bit_size - 1);
141 return 0;
142 }
143
144 #define NUM_COMPONENTS_IS_SEPARATE_7 7
145
146 static uint8_t
147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149 if (num_components <= 4)
150 return num_components;
151 if (num_components == 8)
152 return 5;
153 if (num_components == 16)
154 return 6;
155
156 /* special value indicating that num_components is in the next uint32 */
157 return NUM_COMPONENTS_IS_SEPARATE_7;
158 }
159
160 static uint8_t
161 decode_num_components_in_3bits(uint8_t value)
162 {
163 if (value <= 4)
164 return value;
165 if (value == 5)
166 return 8;
167 if (value == 6)
168 return 16;
169
170 unreachable("invalid num_components encoding");
171 return 0;
172 }
173
174 static void
175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178 blob_write_uint32(ctx->blob, c->num_elements);
179 for (unsigned i = 0; i < c->num_elements; i++)
180 write_constant(ctx, c->elements[i]);
181 }
182
183 static nir_constant *
184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186 nir_constant *c = ralloc(nvar, nir_constant);
187
188 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189 c->num_elements = blob_read_uint32(ctx->blob);
190 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191 for (unsigned i = 0; i < c->num_elements; i++)
192 c->elements[i] = read_constant(ctx, nvar);
193
194 return c;
195 }
196
197 enum var_data_encoding {
198 var_encode_full,
199 var_encode_shader_temp,
200 var_encode_function_temp,
201 var_encode_location_diff,
202 };
203
204 union packed_var {
205 uint32_t u32;
206 struct {
207 unsigned has_name:1;
208 unsigned has_constant_initializer:1;
209 unsigned has_pointer_initializer:1;
210 unsigned has_interface_type:1;
211 unsigned num_state_slots:7;
212 unsigned data_encoding:2;
213 unsigned type_same_as_last:1;
214 unsigned interface_type_same_as_last:1;
215 unsigned _pad:1;
216 unsigned num_members:16;
217 } u;
218 };
219
220 union packed_var_data_diff {
221 uint32_t u32;
222 struct {
223 int location:13;
224 int location_frac:3;
225 int driver_location:16;
226 } u;
227 };
228
229 static void
230 write_variable(write_ctx *ctx, const nir_variable *var)
231 {
232 write_add_object(ctx, var);
233
234 assert(var->num_state_slots < (1 << 7));
235
236 STATIC_ASSERT(sizeof(union packed_var) == 4);
237 union packed_var flags;
238 flags.u32 = 0;
239
240 flags.u.has_name = !ctx->strip && var->name;
241 flags.u.has_constant_initializer = !!(var->constant_initializer);
242 flags.u.has_pointer_initializer = !!(var->pointer_initializer);
243 flags.u.has_interface_type = !!(var->interface_type);
244 flags.u.type_same_as_last = var->type == ctx->last_type;
245 flags.u.interface_type_same_as_last =
246 var->interface_type && var->interface_type == ctx->last_interface_type;
247 flags.u.num_state_slots = var->num_state_slots;
248 flags.u.num_members = var->num_members;
249
250 struct nir_variable_data data = var->data;
251
252 /* When stripping, we expect that the location is no longer needed,
253 * which is typically after shaders are linked.
254 */
255 if (ctx->strip &&
256 data.mode != nir_var_shader_in &&
257 data.mode != nir_var_shader_out)
258 data.location = 0;
259
260 /* Temporary variables don't serialize var->data. */
261 if (data.mode == nir_var_shader_temp)
262 flags.u.data_encoding = var_encode_shader_temp;
263 else if (data.mode == nir_var_function_temp)
264 flags.u.data_encoding = var_encode_function_temp;
265 else {
266 struct nir_variable_data tmp = data;
267
268 tmp.location = ctx->last_var_data.location;
269 tmp.location_frac = ctx->last_var_data.location_frac;
270 tmp.driver_location = ctx->last_var_data.driver_location;
271
272 /* See if we can encode only the difference in locations from the last
273 * variable.
274 */
275 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
276 abs((int)data.location -
277 (int)ctx->last_var_data.location) < (1 << 12) &&
278 abs((int)data.driver_location -
279 (int)ctx->last_var_data.driver_location) < (1 << 15))
280 flags.u.data_encoding = var_encode_location_diff;
281 else
282 flags.u.data_encoding = var_encode_full;
283 }
284
285 blob_write_uint32(ctx->blob, flags.u32);
286
287 if (!flags.u.type_same_as_last) {
288 encode_type_to_blob(ctx->blob, var->type);
289 ctx->last_type = var->type;
290 }
291
292 if (var->interface_type && !flags.u.interface_type_same_as_last) {
293 encode_type_to_blob(ctx->blob, var->interface_type);
294 ctx->last_interface_type = var->interface_type;
295 }
296
297 if (flags.u.has_name)
298 blob_write_string(ctx->blob, var->name);
299
300 if (flags.u.data_encoding == var_encode_full ||
301 flags.u.data_encoding == var_encode_location_diff) {
302 if (flags.u.data_encoding == var_encode_full) {
303 blob_write_bytes(ctx->blob, &data, sizeof(data));
304 } else {
305 /* Serialize only the difference in locations from the last variable.
306 */
307 union packed_var_data_diff diff;
308
309 diff.u.location = data.location - ctx->last_var_data.location;
310 diff.u.location_frac = data.location_frac -
311 ctx->last_var_data.location_frac;
312 diff.u.driver_location = data.driver_location -
313 ctx->last_var_data.driver_location;
314
315 blob_write_uint32(ctx->blob, diff.u32);
316 }
317
318 ctx->last_var_data = data;
319 }
320
321 for (unsigned i = 0; i < var->num_state_slots; i++) {
322 blob_write_bytes(ctx->blob, &var->state_slots[i],
323 sizeof(var->state_slots[i]));
324 }
325 if (var->constant_initializer)
326 write_constant(ctx, var->constant_initializer);
327 if (var->pointer_initializer)
328 write_lookup_object(ctx, var->pointer_initializer);
329 if (var->num_members > 0) {
330 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
331 var->num_members * sizeof(*var->members));
332 }
333 }
334
335 static nir_variable *
336 read_variable(read_ctx *ctx)
337 {
338 nir_variable *var = rzalloc(ctx->nir, nir_variable);
339 read_add_object(ctx, var);
340
341 union packed_var flags;
342 flags.u32 = blob_read_uint32(ctx->blob);
343
344 if (flags.u.type_same_as_last) {
345 var->type = ctx->last_type;
346 } else {
347 var->type = decode_type_from_blob(ctx->blob);
348 ctx->last_type = var->type;
349 }
350
351 if (flags.u.has_interface_type) {
352 if (flags.u.interface_type_same_as_last) {
353 var->interface_type = ctx->last_interface_type;
354 } else {
355 var->interface_type = decode_type_from_blob(ctx->blob);
356 ctx->last_interface_type = var->interface_type;
357 }
358 }
359
360 if (flags.u.has_name) {
361 const char *name = blob_read_string(ctx->blob);
362 var->name = ralloc_strdup(var, name);
363 } else {
364 var->name = NULL;
365 }
366
367 if (flags.u.data_encoding == var_encode_shader_temp)
368 var->data.mode = nir_var_shader_temp;
369 else if (flags.u.data_encoding == var_encode_function_temp)
370 var->data.mode = nir_var_function_temp;
371 else if (flags.u.data_encoding == var_encode_full) {
372 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
373 ctx->last_var_data = var->data;
374 } else { /* var_encode_location_diff */
375 union packed_var_data_diff diff;
376 diff.u32 = blob_read_uint32(ctx->blob);
377
378 var->data = ctx->last_var_data;
379 var->data.location += diff.u.location;
380 var->data.location_frac += diff.u.location_frac;
381 var->data.driver_location += diff.u.driver_location;
382
383 ctx->last_var_data = var->data;
384 }
385
386 var->num_state_slots = flags.u.num_state_slots;
387 if (var->num_state_slots != 0) {
388 var->state_slots = ralloc_array(var, nir_state_slot,
389 var->num_state_slots);
390 for (unsigned i = 0; i < var->num_state_slots; i++) {
391 blob_copy_bytes(ctx->blob, &var->state_slots[i],
392 sizeof(var->state_slots[i]));
393 }
394 }
395 if (flags.u.has_constant_initializer)
396 var->constant_initializer = read_constant(ctx, var);
397 else
398 var->constant_initializer = NULL;
399
400 if (flags.u.has_pointer_initializer)
401 var->pointer_initializer = read_object(ctx);
402 else
403 var->pointer_initializer = NULL;
404
405 var->num_members = flags.u.num_members;
406 if (var->num_members > 0) {
407 var->members = ralloc_array(var, struct nir_variable_data,
408 var->num_members);
409 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
410 var->num_members * sizeof(*var->members));
411 }
412
413 return var;
414 }
415
416 static void
417 write_var_list(write_ctx *ctx, const struct exec_list *src)
418 {
419 blob_write_uint32(ctx->blob, exec_list_length(src));
420 foreach_list_typed(nir_variable, var, node, src) {
421 write_variable(ctx, var);
422 }
423 }
424
425 static void
426 read_var_list(read_ctx *ctx, struct exec_list *dst)
427 {
428 exec_list_make_empty(dst);
429 unsigned num_vars = blob_read_uint32(ctx->blob);
430 for (unsigned i = 0; i < num_vars; i++) {
431 nir_variable *var = read_variable(ctx);
432 exec_list_push_tail(dst, &var->node);
433 }
434 }
435
436 static void
437 write_register(write_ctx *ctx, const nir_register *reg)
438 {
439 write_add_object(ctx, reg);
440 blob_write_uint32(ctx->blob, reg->num_components);
441 blob_write_uint32(ctx->blob, reg->bit_size);
442 blob_write_uint32(ctx->blob, reg->num_array_elems);
443 blob_write_uint32(ctx->blob, reg->index);
444 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
445 if (!ctx->strip && reg->name)
446 blob_write_string(ctx->blob, reg->name);
447 }
448
449 static nir_register *
450 read_register(read_ctx *ctx)
451 {
452 nir_register *reg = ralloc(ctx->nir, nir_register);
453 read_add_object(ctx, reg);
454 reg->num_components = blob_read_uint32(ctx->blob);
455 reg->bit_size = blob_read_uint32(ctx->blob);
456 reg->num_array_elems = blob_read_uint32(ctx->blob);
457 reg->index = blob_read_uint32(ctx->blob);
458 bool has_name = blob_read_uint32(ctx->blob);
459 if (has_name) {
460 const char *name = blob_read_string(ctx->blob);
461 reg->name = ralloc_strdup(reg, name);
462 } else {
463 reg->name = NULL;
464 }
465
466 list_inithead(&reg->uses);
467 list_inithead(&reg->defs);
468 list_inithead(&reg->if_uses);
469
470 return reg;
471 }
472
473 static void
474 write_reg_list(write_ctx *ctx, const struct exec_list *src)
475 {
476 blob_write_uint32(ctx->blob, exec_list_length(src));
477 foreach_list_typed(nir_register, reg, node, src)
478 write_register(ctx, reg);
479 }
480
481 static void
482 read_reg_list(read_ctx *ctx, struct exec_list *dst)
483 {
484 exec_list_make_empty(dst);
485 unsigned num_regs = blob_read_uint32(ctx->blob);
486 for (unsigned i = 0; i < num_regs; i++) {
487 nir_register *reg = read_register(ctx);
488 exec_list_push_tail(dst, &reg->node);
489 }
490 }
491
492 union packed_src {
493 uint32_t u32;
494 struct {
495 unsigned is_ssa:1; /* <-- Header */
496 unsigned is_indirect:1;
497 unsigned object_idx:20;
498 unsigned _footer:10; /* <-- Footer */
499 } any;
500 struct {
501 unsigned _header:22; /* <-- Header */
502 unsigned negate:1; /* <-- Footer */
503 unsigned abs:1;
504 unsigned swizzle_x:2;
505 unsigned swizzle_y:2;
506 unsigned swizzle_z:2;
507 unsigned swizzle_w:2;
508 } alu;
509 struct {
510 unsigned _header:22; /* <-- Header */
511 unsigned src_type:5; /* <-- Footer */
512 unsigned _pad:5;
513 } tex;
514 };
515
516 static void
517 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
518 {
519 /* Since sources are very frequent, we try to save some space when storing
520 * them. In particular, we store whether the source is a register and
521 * whether the register has an indirect index in the low two bits. We can
522 * assume that the high two bits of the index are zero, since otherwise our
523 * address space would've been exhausted allocating the remap table!
524 */
525 header.any.is_ssa = src->is_ssa;
526 if (src->is_ssa) {
527 header.any.object_idx = write_lookup_object(ctx, src->ssa);
528 blob_write_uint32(ctx->blob, header.u32);
529 } else {
530 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
531 header.any.is_indirect = !!src->reg.indirect;
532 blob_write_uint32(ctx->blob, header.u32);
533 blob_write_uint32(ctx->blob, src->reg.base_offset);
534 if (src->reg.indirect) {
535 union packed_src header = {0};
536 write_src_full(ctx, src->reg.indirect, header);
537 }
538 }
539 }
540
541 static void
542 write_src(write_ctx *ctx, const nir_src *src)
543 {
544 union packed_src header = {0};
545 write_src_full(ctx, src, header);
546 }
547
548 static union packed_src
549 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
550 {
551 STATIC_ASSERT(sizeof(union packed_src) == 4);
552 union packed_src header;
553 header.u32 = blob_read_uint32(ctx->blob);
554
555 src->is_ssa = header.any.is_ssa;
556 if (src->is_ssa) {
557 src->ssa = read_lookup_object(ctx, header.any.object_idx);
558 } else {
559 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
560 src->reg.base_offset = blob_read_uint32(ctx->blob);
561 if (header.any.is_indirect) {
562 src->reg.indirect = ralloc(mem_ctx, nir_src);
563 read_src(ctx, src->reg.indirect, mem_ctx);
564 } else {
565 src->reg.indirect = NULL;
566 }
567 }
568 return header;
569 }
570
571 union packed_dest {
572 uint8_t u8;
573 struct {
574 uint8_t is_ssa:1;
575 uint8_t has_name:1;
576 uint8_t num_components:3;
577 uint8_t bit_size:3;
578 } ssa;
579 struct {
580 uint8_t is_ssa:1;
581 uint8_t is_indirect:1;
582 uint8_t _pad:6;
583 } reg;
584 };
585
586 enum intrinsic_const_indices_encoding {
587 /* Use the 9 bits of packed_const_indices to store 1-9 indices.
588 * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
589 * 4 2-bit indices, or 5-9 1-bit indices.
590 *
591 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
592 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
593 */
594 const_indices_9bit_all_combined,
595
596 const_indices_8bit, /* 8 bits per element */
597 const_indices_16bit, /* 16 bits per element */
598 const_indices_32bit, /* 32 bits per element */
599 };
600
601 enum load_const_packing {
602 /* Constants are not packed and are stored in following dwords. */
603 load_const_full,
604
605 /* packed_value contains high 19 bits, low bits are 0,
606 * good for floating-point decimals
607 */
608 load_const_scalar_hi_19bits,
609
610 /* packed_value contains low 19 bits, high bits are sign-extended */
611 load_const_scalar_lo_19bits_sext,
612 };
613
614 union packed_instr {
615 uint32_t u32;
616 struct {
617 unsigned instr_type:4; /* always present */
618 unsigned _pad:20;
619 unsigned dest:8; /* always last */
620 } any;
621 struct {
622 unsigned instr_type:4;
623 unsigned exact:1;
624 unsigned no_signed_wrap:1;
625 unsigned no_unsigned_wrap:1;
626 unsigned saturate:1;
627 /* Reg: writemask; SSA: swizzles for 2 srcs */
628 unsigned writemask_or_two_swizzles:4;
629 unsigned op:9;
630 unsigned packed_src_ssa_16bit:1;
631 /* Scalarized ALUs always have the same header. */
632 unsigned num_followup_alu_sharing_header:2;
633 unsigned dest:8;
634 } alu;
635 struct {
636 unsigned instr_type:4;
637 unsigned deref_type:3;
638 unsigned cast_type_same_as_last:1;
639 unsigned mode:10; /* deref_var redefines this */
640 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
641 unsigned _pad:5; /* deref_var redefines this */
642 unsigned dest:8;
643 } deref;
644 struct {
645 unsigned instr_type:4;
646 unsigned deref_type:3;
647 unsigned _pad:1;
648 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
649 unsigned dest:8;
650 } deref_var;
651 struct {
652 unsigned instr_type:4;
653 unsigned intrinsic:9;
654 unsigned const_indices_encoding:2;
655 unsigned packed_const_indices:9;
656 unsigned dest:8;
657 } intrinsic;
658 struct {
659 unsigned instr_type:4;
660 unsigned last_component:4;
661 unsigned bit_size:3;
662 unsigned packing:2; /* enum load_const_packing */
663 unsigned packed_value:19; /* meaning determined by packing */
664 } load_const;
665 struct {
666 unsigned instr_type:4;
667 unsigned last_component:4;
668 unsigned bit_size:3;
669 unsigned _pad:21;
670 } undef;
671 struct {
672 unsigned instr_type:4;
673 unsigned num_srcs:4;
674 unsigned op:4;
675 unsigned dest:8;
676 unsigned _pad:12;
677 } tex;
678 struct {
679 unsigned instr_type:4;
680 unsigned num_srcs:20;
681 unsigned dest:8;
682 } phi;
683 struct {
684 unsigned instr_type:4;
685 unsigned type:2;
686 unsigned _pad:26;
687 } jump;
688 };
689
690 /* Write "lo24" as low 24 bits in the first uint32. */
691 static void
692 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
693 nir_instr_type instr_type)
694 {
695 STATIC_ASSERT(sizeof(union packed_dest) == 1);
696 union packed_dest dest;
697 dest.u8 = 0;
698
699 dest.ssa.is_ssa = dst->is_ssa;
700 if (dst->is_ssa) {
701 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
702 dest.ssa.num_components =
703 encode_num_components_in_3bits(dst->ssa.num_components);
704 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
705 } else {
706 dest.reg.is_indirect = !!(dst->reg.indirect);
707 }
708 header.any.dest = dest.u8;
709
710 /* Check if the current ALU instruction has the same header as the previous
711 * instruction that is also ALU. If it is, we don't have to write
712 * the current header. This is a typical occurence after scalarization.
713 */
714 if (instr_type == nir_instr_type_alu) {
715 bool equal_header = false;
716
717 if (ctx->last_instr_type == nir_instr_type_alu) {
718 assert(ctx->last_alu_header_offset);
719 union packed_instr *last_header =
720 (union packed_instr *)(ctx->blob->data +
721 ctx->last_alu_header_offset);
722
723 /* Clear the field that counts ALUs with equal headers. */
724 union packed_instr clean_header;
725 clean_header.u32 = last_header->u32;
726 clean_header.alu.num_followup_alu_sharing_header = 0;
727
728 /* There can be at most 4 consecutive ALU instructions
729 * sharing the same header.
730 */
731 if (last_header->alu.num_followup_alu_sharing_header < 3 &&
732 header.u32 == clean_header.u32) {
733 last_header->alu.num_followup_alu_sharing_header++;
734 equal_header = true;
735 }
736 }
737
738 if (!equal_header) {
739 ctx->last_alu_header_offset = ctx->blob->size;
740 blob_write_uint32(ctx->blob, header.u32);
741 }
742 } else {
743 blob_write_uint32(ctx->blob, header.u32);
744 }
745
746 if (dest.ssa.is_ssa &&
747 dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
748 blob_write_uint32(ctx->blob, dst->ssa.num_components);
749
750 if (dst->is_ssa) {
751 write_add_object(ctx, &dst->ssa);
752 if (dest.ssa.has_name)
753 blob_write_string(ctx->blob, dst->ssa.name);
754 } else {
755 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
756 blob_write_uint32(ctx->blob, dst->reg.base_offset);
757 if (dst->reg.indirect)
758 write_src(ctx, dst->reg.indirect);
759 }
760 }
761
762 static void
763 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
764 union packed_instr header)
765 {
766 union packed_dest dest;
767 dest.u8 = header.any.dest;
768
769 if (dest.ssa.is_ssa) {
770 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
771 unsigned num_components;
772 if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
773 num_components = blob_read_uint32(ctx->blob);
774 else
775 num_components = decode_num_components_in_3bits(dest.ssa.num_components);
776 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
777 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
778 read_add_object(ctx, &dst->ssa);
779 } else {
780 dst->reg.reg = read_object(ctx);
781 dst->reg.base_offset = blob_read_uint32(ctx->blob);
782 if (dest.reg.is_indirect) {
783 dst->reg.indirect = ralloc(instr, nir_src);
784 read_src(ctx, dst->reg.indirect, instr);
785 }
786 }
787 }
788
789 static bool
790 are_object_ids_16bit(write_ctx *ctx)
791 {
792 /* Check the highest object ID, because they are monotonic. */
793 return ctx->next_idx < (1 << 16);
794 }
795
796 static bool
797 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
798 {
799 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
800
801 for (unsigned i = 0; i < num_srcs; i++) {
802 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
803 return false;
804
805 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
806
807 for (unsigned chan = 0; chan < src_components; chan++) {
808 /* The swizzles for src0.x and src1.x are stored
809 * in writemask_or_two_swizzles for SSA ALUs.
810 */
811 if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
812 alu->src[i].swizzle[chan] < 4)
813 continue;
814
815 if (alu->src[i].swizzle[chan] != chan)
816 return false;
817 }
818 }
819
820 return are_object_ids_16bit(ctx);
821 }
822
823 static void
824 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
825 {
826 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
827 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
828
829 /* 9 bits for nir_op */
830 STATIC_ASSERT(nir_num_opcodes <= 512);
831 union packed_instr header;
832 header.u32 = 0;
833
834 header.alu.instr_type = alu->instr.type;
835 header.alu.exact = alu->exact;
836 header.alu.no_signed_wrap = alu->no_signed_wrap;
837 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
838 header.alu.saturate = alu->dest.saturate;
839 header.alu.op = alu->op;
840 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
841
842 if (header.alu.packed_src_ssa_16bit &&
843 alu->dest.dest.is_ssa) {
844 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
845 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
846 if (num_srcs > 1)
847 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
848 } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
849 /* For vec4 registers, this field is a writemask. */
850 header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
851 }
852
853 write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
854
855 if (!alu->dest.dest.is_ssa && dst_components > 4)
856 blob_write_uint32(ctx->blob, alu->dest.write_mask);
857
858 if (header.alu.packed_src_ssa_16bit) {
859 for (unsigned i = 0; i < num_srcs; i++) {
860 assert(alu->src[i].src.is_ssa);
861 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
862 assert(idx < (1 << 16));
863 blob_write_uint16(ctx->blob, idx);
864 }
865 } else {
866 for (unsigned i = 0; i < num_srcs; i++) {
867 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
868 unsigned src_components = nir_src_num_components(alu->src[i].src);
869 union packed_src src;
870 bool packed = src_components <= 4 && src_channels <= 4;
871 src.u32 = 0;
872
873 src.alu.negate = alu->src[i].negate;
874 src.alu.abs = alu->src[i].abs;
875
876 if (packed) {
877 src.alu.swizzle_x = alu->src[i].swizzle[0];
878 src.alu.swizzle_y = alu->src[i].swizzle[1];
879 src.alu.swizzle_z = alu->src[i].swizzle[2];
880 src.alu.swizzle_w = alu->src[i].swizzle[3];
881 }
882
883 write_src_full(ctx, &alu->src[i].src, src);
884
885 /* Store swizzles for vec8 and vec16. */
886 if (!packed) {
887 for (unsigned o = 0; o < src_channels; o += 8) {
888 unsigned value = 0;
889
890 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
891 value |= (uint32_t)alu->src[i].swizzle[o + j] <<
892 (4 * j); /* 4 bits per swizzle */
893 }
894
895 blob_write_uint32(ctx->blob, value);
896 }
897 }
898 }
899 }
900 }
901
902 static nir_alu_instr *
903 read_alu(read_ctx *ctx, union packed_instr header)
904 {
905 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
906 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
907
908 alu->exact = header.alu.exact;
909 alu->no_signed_wrap = header.alu.no_signed_wrap;
910 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
911 alu->dest.saturate = header.alu.saturate;
912
913 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
914
915 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
916
917 if (alu->dest.dest.is_ssa) {
918 alu->dest.write_mask = u_bit_consecutive(0, dst_components);
919 } else if (dst_components <= 4) {
920 alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
921 } else {
922 alu->dest.write_mask = blob_read_uint32(ctx->blob);
923 }
924
925 if (header.alu.packed_src_ssa_16bit) {
926 for (unsigned i = 0; i < num_srcs; i++) {
927 nir_alu_src *src = &alu->src[i];
928 src->src.is_ssa = true;
929 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
930
931 memset(&src->swizzle, 0, sizeof(src->swizzle));
932
933 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
934
935 for (unsigned chan = 0; chan < src_components; chan++)
936 src->swizzle[chan] = chan;
937 }
938 } else {
939 for (unsigned i = 0; i < num_srcs; i++) {
940 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
941 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
942 unsigned src_components = nir_src_num_components(alu->src[i].src);
943 bool packed = src_components <= 4 && src_channels <= 4;
944
945 alu->src[i].negate = src.alu.negate;
946 alu->src[i].abs = src.alu.abs;
947
948 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
949
950 if (packed) {
951 alu->src[i].swizzle[0] = src.alu.swizzle_x;
952 alu->src[i].swizzle[1] = src.alu.swizzle_y;
953 alu->src[i].swizzle[2] = src.alu.swizzle_z;
954 alu->src[i].swizzle[3] = src.alu.swizzle_w;
955 } else {
956 /* Load swizzles for vec8 and vec16. */
957 for (unsigned o = 0; o < src_channels; o += 8) {
958 unsigned value = blob_read_uint32(ctx->blob);
959
960 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
961 alu->src[i].swizzle[o + j] =
962 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
963 }
964 }
965 }
966 }
967 }
968
969 if (header.alu.packed_src_ssa_16bit &&
970 alu->dest.dest.is_ssa) {
971 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
972 if (num_srcs > 1)
973 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
974 }
975
976 return alu;
977 }
978
979 static void
980 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
981 {
982 assert(deref->deref_type < 8);
983 assert(deref->mode < (1 << 10));
984
985 union packed_instr header;
986 header.u32 = 0;
987
988 header.deref.instr_type = deref->instr.type;
989 header.deref.deref_type = deref->deref_type;
990
991 if (deref->deref_type == nir_deref_type_cast) {
992 header.deref.mode = deref->mode;
993 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
994 }
995
996 unsigned var_idx = 0;
997 if (deref->deref_type == nir_deref_type_var) {
998 var_idx = write_lookup_object(ctx, deref->var);
999 if (var_idx && var_idx < (1 << 16))
1000 header.deref_var.object_idx = var_idx;
1001 }
1002
1003 if (deref->deref_type == nir_deref_type_array ||
1004 deref->deref_type == nir_deref_type_ptr_as_array) {
1005 header.deref.packed_src_ssa_16bit =
1006 deref->parent.is_ssa && deref->arr.index.is_ssa &&
1007 are_object_ids_16bit(ctx);
1008 }
1009
1010 write_dest(ctx, &deref->dest, header, deref->instr.type);
1011
1012 switch (deref->deref_type) {
1013 case nir_deref_type_var:
1014 if (!header.deref_var.object_idx)
1015 blob_write_uint32(ctx->blob, var_idx);
1016 break;
1017
1018 case nir_deref_type_struct:
1019 write_src(ctx, &deref->parent);
1020 blob_write_uint32(ctx->blob, deref->strct.index);
1021 break;
1022
1023 case nir_deref_type_array:
1024 case nir_deref_type_ptr_as_array:
1025 if (header.deref.packed_src_ssa_16bit) {
1026 blob_write_uint16(ctx->blob,
1027 write_lookup_object(ctx, deref->parent.ssa));
1028 blob_write_uint16(ctx->blob,
1029 write_lookup_object(ctx, deref->arr.index.ssa));
1030 } else {
1031 write_src(ctx, &deref->parent);
1032 write_src(ctx, &deref->arr.index);
1033 }
1034 break;
1035
1036 case nir_deref_type_cast:
1037 write_src(ctx, &deref->parent);
1038 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1039 if (!header.deref.cast_type_same_as_last) {
1040 encode_type_to_blob(ctx->blob, deref->type);
1041 ctx->last_type = deref->type;
1042 }
1043 break;
1044
1045 case nir_deref_type_array_wildcard:
1046 write_src(ctx, &deref->parent);
1047 break;
1048
1049 default:
1050 unreachable("Invalid deref type");
1051 }
1052 }
1053
1054 static nir_deref_instr *
1055 read_deref(read_ctx *ctx, union packed_instr header)
1056 {
1057 nir_deref_type deref_type = header.deref.deref_type;
1058 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1059
1060 read_dest(ctx, &deref->dest, &deref->instr, header);
1061
1062 nir_deref_instr *parent;
1063
1064 switch (deref->deref_type) {
1065 case nir_deref_type_var:
1066 if (header.deref_var.object_idx)
1067 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1068 else
1069 deref->var = read_object(ctx);
1070
1071 deref->type = deref->var->type;
1072 break;
1073
1074 case nir_deref_type_struct:
1075 read_src(ctx, &deref->parent, &deref->instr);
1076 parent = nir_src_as_deref(deref->parent);
1077 deref->strct.index = blob_read_uint32(ctx->blob);
1078 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1079 break;
1080
1081 case nir_deref_type_array:
1082 case nir_deref_type_ptr_as_array:
1083 if (header.deref.packed_src_ssa_16bit) {
1084 deref->parent.is_ssa = true;
1085 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1086 deref->arr.index.is_ssa = true;
1087 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1088 } else {
1089 read_src(ctx, &deref->parent, &deref->instr);
1090 read_src(ctx, &deref->arr.index, &deref->instr);
1091 }
1092
1093 parent = nir_src_as_deref(deref->parent);
1094 if (deref->deref_type == nir_deref_type_array)
1095 deref->type = glsl_get_array_element(parent->type);
1096 else
1097 deref->type = parent->type;
1098 break;
1099
1100 case nir_deref_type_cast:
1101 read_src(ctx, &deref->parent, &deref->instr);
1102 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1103 if (header.deref.cast_type_same_as_last) {
1104 deref->type = ctx->last_type;
1105 } else {
1106 deref->type = decode_type_from_blob(ctx->blob);
1107 ctx->last_type = deref->type;
1108 }
1109 break;
1110
1111 case nir_deref_type_array_wildcard:
1112 read_src(ctx, &deref->parent, &deref->instr);
1113 parent = nir_src_as_deref(deref->parent);
1114 deref->type = glsl_get_array_element(parent->type);
1115 break;
1116
1117 default:
1118 unreachable("Invalid deref type");
1119 }
1120
1121 if (deref_type == nir_deref_type_var) {
1122 deref->mode = deref->var->data.mode;
1123 } else if (deref->deref_type == nir_deref_type_cast) {
1124 deref->mode = header.deref.mode;
1125 } else {
1126 assert(deref->parent.is_ssa);
1127 deref->mode = nir_instr_as_deref(deref->parent.ssa->parent_instr)->mode;
1128 }
1129
1130 return deref;
1131 }
1132
1133 static void
1134 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1135 {
1136 /* 9 bits for nir_intrinsic_op */
1137 STATIC_ASSERT(nir_num_intrinsics <= 512);
1138 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1139 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1140 assert(intrin->intrinsic < 512);
1141
1142 union packed_instr header;
1143 header.u32 = 0;
1144
1145 header.intrinsic.instr_type = intrin->instr.type;
1146 header.intrinsic.intrinsic = intrin->intrinsic;
1147
1148 /* Analyze constant indices to decide how to encode them. */
1149 if (num_indices) {
1150 unsigned max_bits = 0;
1151 for (unsigned i = 0; i < num_indices; i++) {
1152 unsigned max = util_last_bit(intrin->const_index[i]);
1153 max_bits = MAX2(max_bits, max);
1154 }
1155
1156 if (max_bits * num_indices <= 9) {
1157 header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
1158
1159 /* Pack all const indices into 6 bits. */
1160 unsigned bit_size = 9 / num_indices;
1161 for (unsigned i = 0; i < num_indices; i++) {
1162 header.intrinsic.packed_const_indices |=
1163 intrin->const_index[i] << (i * bit_size);
1164 }
1165 } else if (max_bits <= 8)
1166 header.intrinsic.const_indices_encoding = const_indices_8bit;
1167 else if (max_bits <= 16)
1168 header.intrinsic.const_indices_encoding = const_indices_16bit;
1169 else
1170 header.intrinsic.const_indices_encoding = const_indices_32bit;
1171 }
1172
1173 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1174 write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1175 else
1176 blob_write_uint32(ctx->blob, header.u32);
1177
1178 for (unsigned i = 0; i < num_srcs; i++)
1179 write_src(ctx, &intrin->src[i]);
1180
1181 if (num_indices) {
1182 switch (header.intrinsic.const_indices_encoding) {
1183 case const_indices_8bit:
1184 for (unsigned i = 0; i < num_indices; i++)
1185 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1186 break;
1187 case const_indices_16bit:
1188 for (unsigned i = 0; i < num_indices; i++)
1189 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1190 break;
1191 case const_indices_32bit:
1192 for (unsigned i = 0; i < num_indices; i++)
1193 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1194 break;
1195 }
1196 }
1197 }
1198
1199 static nir_intrinsic_instr *
1200 read_intrinsic(read_ctx *ctx, union packed_instr header)
1201 {
1202 nir_intrinsic_op op = header.intrinsic.intrinsic;
1203 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1204
1205 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1206 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1207
1208 if (nir_intrinsic_infos[op].has_dest)
1209 read_dest(ctx, &intrin->dest, &intrin->instr, header);
1210
1211 for (unsigned i = 0; i < num_srcs; i++)
1212 read_src(ctx, &intrin->src[i], &intrin->instr);
1213
1214 /* Vectorized instrinsics have num_components same as dst or src that has
1215 * 0 components in the info. Find it.
1216 */
1217 if (nir_intrinsic_infos[op].has_dest &&
1218 nir_intrinsic_infos[op].dest_components == 0) {
1219 intrin->num_components = nir_dest_num_components(intrin->dest);
1220 } else {
1221 for (unsigned i = 0; i < num_srcs; i++) {
1222 if (nir_intrinsic_infos[op].src_components[i] == 0) {
1223 intrin->num_components = nir_src_num_components(intrin->src[i]);
1224 break;
1225 }
1226 }
1227 }
1228
1229 if (num_indices) {
1230 switch (header.intrinsic.const_indices_encoding) {
1231 case const_indices_9bit_all_combined: {
1232 unsigned bit_size = 9 / num_indices;
1233 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1234 for (unsigned i = 0; i < num_indices; i++) {
1235 intrin->const_index[i] =
1236 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1237 bit_mask;
1238 }
1239 break;
1240 }
1241 case const_indices_8bit:
1242 for (unsigned i = 0; i < num_indices; i++)
1243 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1244 break;
1245 case const_indices_16bit:
1246 for (unsigned i = 0; i < num_indices; i++)
1247 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1248 break;
1249 case const_indices_32bit:
1250 for (unsigned i = 0; i < num_indices; i++)
1251 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1252 break;
1253 }
1254 }
1255
1256 return intrin;
1257 }
1258
1259 static void
1260 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1261 {
1262 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1263 union packed_instr header;
1264 header.u32 = 0;
1265
1266 header.load_const.instr_type = lc->instr.type;
1267 header.load_const.last_component = lc->def.num_components - 1;
1268 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1269 header.load_const.packing = load_const_full;
1270
1271 /* Try to pack 1-component constants into the 19 free bits in the header. */
1272 if (lc->def.num_components == 1) {
1273 switch (lc->def.bit_size) {
1274 case 64:
1275 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1276 /* packed_value contains high 19 bits, low bits are 0 */
1277 header.load_const.packing = load_const_scalar_hi_19bits;
1278 header.load_const.packed_value = lc->value[0].u64 >> 45;
1279 } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1280 /* packed_value contains low 19 bits, high bits are sign-extended */
1281 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1282 header.load_const.packed_value = lc->value[0].u64;
1283 }
1284 break;
1285
1286 case 32:
1287 if ((lc->value[0].u32 & 0x1fff) == 0) {
1288 header.load_const.packing = load_const_scalar_hi_19bits;
1289 header.load_const.packed_value = lc->value[0].u32 >> 13;
1290 } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1291 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1292 header.load_const.packed_value = lc->value[0].u32;
1293 }
1294 break;
1295
1296 case 16:
1297 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1298 header.load_const.packed_value = lc->value[0].u16;
1299 break;
1300 case 8:
1301 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1302 header.load_const.packed_value = lc->value[0].u8;
1303 break;
1304 case 1:
1305 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1306 header.load_const.packed_value = lc->value[0].b;
1307 break;
1308 default:
1309 unreachable("invalid bit_size");
1310 }
1311 }
1312
1313 blob_write_uint32(ctx->blob, header.u32);
1314
1315 if (header.load_const.packing == load_const_full) {
1316 switch (lc->def.bit_size) {
1317 case 64:
1318 blob_write_bytes(ctx->blob, lc->value,
1319 sizeof(*lc->value) * lc->def.num_components);
1320 break;
1321
1322 case 32:
1323 for (unsigned i = 0; i < lc->def.num_components; i++)
1324 blob_write_uint32(ctx->blob, lc->value[i].u32);
1325 break;
1326
1327 case 16:
1328 for (unsigned i = 0; i < lc->def.num_components; i++)
1329 blob_write_uint16(ctx->blob, lc->value[i].u16);
1330 break;
1331
1332 default:
1333 assert(lc->def.bit_size <= 8);
1334 for (unsigned i = 0; i < lc->def.num_components; i++)
1335 blob_write_uint8(ctx->blob, lc->value[i].u8);
1336 break;
1337 }
1338 }
1339
1340 write_add_object(ctx, &lc->def);
1341 }
1342
1343 static nir_load_const_instr *
1344 read_load_const(read_ctx *ctx, union packed_instr header)
1345 {
1346 nir_load_const_instr *lc =
1347 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1348 decode_bit_size_3bits(header.load_const.bit_size));
1349
1350 switch (header.load_const.packing) {
1351 case load_const_scalar_hi_19bits:
1352 switch (lc->def.bit_size) {
1353 case 64:
1354 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1355 break;
1356 case 32:
1357 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1358 break;
1359 default:
1360 unreachable("invalid bit_size");
1361 }
1362 break;
1363
1364 case load_const_scalar_lo_19bits_sext:
1365 switch (lc->def.bit_size) {
1366 case 64:
1367 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1368 break;
1369 case 32:
1370 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1371 break;
1372 case 16:
1373 lc->value[0].u16 = header.load_const.packed_value;
1374 break;
1375 case 8:
1376 lc->value[0].u8 = header.load_const.packed_value;
1377 break;
1378 case 1:
1379 lc->value[0].b = header.load_const.packed_value;
1380 break;
1381 default:
1382 unreachable("invalid bit_size");
1383 }
1384 break;
1385
1386 case load_const_full:
1387 switch (lc->def.bit_size) {
1388 case 64:
1389 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1390 break;
1391
1392 case 32:
1393 for (unsigned i = 0; i < lc->def.num_components; i++)
1394 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1395 break;
1396
1397 case 16:
1398 for (unsigned i = 0; i < lc->def.num_components; i++)
1399 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1400 break;
1401
1402 default:
1403 assert(lc->def.bit_size <= 8);
1404 for (unsigned i = 0; i < lc->def.num_components; i++)
1405 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1406 break;
1407 }
1408 break;
1409 }
1410
1411 read_add_object(ctx, &lc->def);
1412 return lc;
1413 }
1414
1415 static void
1416 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1417 {
1418 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1419
1420 union packed_instr header;
1421 header.u32 = 0;
1422
1423 header.undef.instr_type = undef->instr.type;
1424 header.undef.last_component = undef->def.num_components - 1;
1425 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1426
1427 blob_write_uint32(ctx->blob, header.u32);
1428 write_add_object(ctx, &undef->def);
1429 }
1430
1431 static nir_ssa_undef_instr *
1432 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1433 {
1434 nir_ssa_undef_instr *undef =
1435 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1436 decode_bit_size_3bits(header.undef.bit_size));
1437
1438 read_add_object(ctx, &undef->def);
1439 return undef;
1440 }
1441
1442 union packed_tex_data {
1443 uint32_t u32;
1444 struct {
1445 enum glsl_sampler_dim sampler_dim:4;
1446 unsigned dest_type:8;
1447 unsigned coord_components:3;
1448 unsigned is_array:1;
1449 unsigned is_shadow:1;
1450 unsigned is_new_style_shadow:1;
1451 unsigned component:2;
1452 unsigned texture_non_uniform:1;
1453 unsigned sampler_non_uniform:1;
1454 unsigned unused:8; /* Mark unused for valgrind. */
1455 } u;
1456 };
1457
1458 static void
1459 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1460 {
1461 assert(tex->num_srcs < 16);
1462 assert(tex->op < 16);
1463
1464 union packed_instr header;
1465 header.u32 = 0;
1466
1467 header.tex.instr_type = tex->instr.type;
1468 header.tex.num_srcs = tex->num_srcs;
1469 header.tex.op = tex->op;
1470
1471 write_dest(ctx, &tex->dest, header, tex->instr.type);
1472
1473 blob_write_uint32(ctx->blob, tex->texture_index);
1474 blob_write_uint32(ctx->blob, tex->sampler_index);
1475 if (tex->op == nir_texop_tg4)
1476 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1477
1478 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1479 union packed_tex_data packed = {
1480 .u.sampler_dim = tex->sampler_dim,
1481 .u.dest_type = tex->dest_type,
1482 .u.coord_components = tex->coord_components,
1483 .u.is_array = tex->is_array,
1484 .u.is_shadow = tex->is_shadow,
1485 .u.is_new_style_shadow = tex->is_new_style_shadow,
1486 .u.component = tex->component,
1487 .u.texture_non_uniform = tex->texture_non_uniform,
1488 .u.sampler_non_uniform = tex->sampler_non_uniform,
1489 };
1490 blob_write_uint32(ctx->blob, packed.u32);
1491
1492 for (unsigned i = 0; i < tex->num_srcs; i++) {
1493 union packed_src src;
1494 src.u32 = 0;
1495 src.tex.src_type = tex->src[i].src_type;
1496 write_src_full(ctx, &tex->src[i].src, src);
1497 }
1498 }
1499
1500 static nir_tex_instr *
1501 read_tex(read_ctx *ctx, union packed_instr header)
1502 {
1503 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1504
1505 read_dest(ctx, &tex->dest, &tex->instr, header);
1506
1507 tex->op = header.tex.op;
1508 tex->texture_index = blob_read_uint32(ctx->blob);
1509 tex->sampler_index = blob_read_uint32(ctx->blob);
1510 if (tex->op == nir_texop_tg4)
1511 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1512
1513 union packed_tex_data packed;
1514 packed.u32 = blob_read_uint32(ctx->blob);
1515 tex->sampler_dim = packed.u.sampler_dim;
1516 tex->dest_type = packed.u.dest_type;
1517 tex->coord_components = packed.u.coord_components;
1518 tex->is_array = packed.u.is_array;
1519 tex->is_shadow = packed.u.is_shadow;
1520 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1521 tex->component = packed.u.component;
1522 tex->texture_non_uniform = packed.u.texture_non_uniform;
1523 tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1524
1525 for (unsigned i = 0; i < tex->num_srcs; i++) {
1526 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1527 tex->src[i].src_type = src.tex.src_type;
1528 }
1529
1530 return tex;
1531 }
1532
1533 static void
1534 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1535 {
1536 union packed_instr header;
1537 header.u32 = 0;
1538
1539 header.phi.instr_type = phi->instr.type;
1540 header.phi.num_srcs = exec_list_length(&phi->srcs);
1541
1542 /* Phi nodes are special, since they may reference SSA definitions and
1543 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1544 * and then store enough information so that a later fixup pass can fill
1545 * them in correctly.
1546 */
1547 write_dest(ctx, &phi->dest, header, phi->instr.type);
1548
1549 nir_foreach_phi_src(src, phi) {
1550 assert(src->src.is_ssa);
1551 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1552 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1553 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1554 write_phi_fixup fixup = {
1555 .blob_offset = blob_offset,
1556 .src = src->src.ssa,
1557 .block = src->pred,
1558 };
1559 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1560 }
1561 }
1562
1563 static void
1564 write_fixup_phis(write_ctx *ctx)
1565 {
1566 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1567 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1568 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1569 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1570 }
1571
1572 util_dynarray_clear(&ctx->phi_fixups);
1573 }
1574
1575 static nir_phi_instr *
1576 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1577 {
1578 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1579
1580 read_dest(ctx, &phi->dest, &phi->instr, header);
1581
1582 /* For similar reasons as before, we just store the index directly into the
1583 * pointer, and let a later pass resolve the phi sources.
1584 *
1585 * In order to ensure that the copied sources (which are just the indices
1586 * from the blob for now) don't get inserted into the old shader's use-def
1587 * lists, we have to add the phi instruction *before* we set up its
1588 * sources.
1589 */
1590 nir_instr_insert_after_block(blk, &phi->instr);
1591
1592 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1593 nir_phi_src *src = ralloc(phi, nir_phi_src);
1594
1595 src->src.is_ssa = true;
1596 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1597 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1598
1599 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1600 * we have to set the parent_instr manually. It doesn't really matter
1601 * when we do it, so we might as well do it here.
1602 */
1603 src->src.parent_instr = &phi->instr;
1604
1605 /* Stash it in the list of phi sources. We'll walk this list and fix up
1606 * sources at the very end of read_function_impl.
1607 */
1608 list_add(&src->src.use_link, &ctx->phi_srcs);
1609
1610 exec_list_push_tail(&phi->srcs, &src->node);
1611 }
1612
1613 return phi;
1614 }
1615
1616 static void
1617 read_fixup_phis(read_ctx *ctx)
1618 {
1619 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1620 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1621 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1622
1623 /* Remove from this list */
1624 list_del(&src->src.use_link);
1625
1626 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1627 }
1628 assert(list_is_empty(&ctx->phi_srcs));
1629 }
1630
1631 static void
1632 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1633 {
1634 assert(jmp->type < 4);
1635
1636 union packed_instr header;
1637 header.u32 = 0;
1638
1639 header.jump.instr_type = jmp->instr.type;
1640 header.jump.type = jmp->type;
1641
1642 blob_write_uint32(ctx->blob, header.u32);
1643 }
1644
1645 static nir_jump_instr *
1646 read_jump(read_ctx *ctx, union packed_instr header)
1647 {
1648 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1649 return jmp;
1650 }
1651
1652 static void
1653 write_call(write_ctx *ctx, const nir_call_instr *call)
1654 {
1655 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1656
1657 for (unsigned i = 0; i < call->num_params; i++)
1658 write_src(ctx, &call->params[i]);
1659 }
1660
1661 static nir_call_instr *
1662 read_call(read_ctx *ctx)
1663 {
1664 nir_function *callee = read_object(ctx);
1665 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1666
1667 for (unsigned i = 0; i < call->num_params; i++)
1668 read_src(ctx, &call->params[i], call);
1669
1670 return call;
1671 }
1672
1673 static void
1674 write_instr(write_ctx *ctx, const nir_instr *instr)
1675 {
1676 /* We have only 4 bits for the instruction type. */
1677 assert(instr->type < 16);
1678
1679 switch (instr->type) {
1680 case nir_instr_type_alu:
1681 write_alu(ctx, nir_instr_as_alu(instr));
1682 break;
1683 case nir_instr_type_deref:
1684 write_deref(ctx, nir_instr_as_deref(instr));
1685 break;
1686 case nir_instr_type_intrinsic:
1687 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1688 break;
1689 case nir_instr_type_load_const:
1690 write_load_const(ctx, nir_instr_as_load_const(instr));
1691 break;
1692 case nir_instr_type_ssa_undef:
1693 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1694 break;
1695 case nir_instr_type_tex:
1696 write_tex(ctx, nir_instr_as_tex(instr));
1697 break;
1698 case nir_instr_type_phi:
1699 write_phi(ctx, nir_instr_as_phi(instr));
1700 break;
1701 case nir_instr_type_jump:
1702 write_jump(ctx, nir_instr_as_jump(instr));
1703 break;
1704 case nir_instr_type_call:
1705 blob_write_uint32(ctx->blob, instr->type);
1706 write_call(ctx, nir_instr_as_call(instr));
1707 break;
1708 case nir_instr_type_parallel_copy:
1709 unreachable("Cannot write parallel copies");
1710 default:
1711 unreachable("bad instr type");
1712 }
1713 }
1714
1715 /* Return the number of instructions read. */
1716 static unsigned
1717 read_instr(read_ctx *ctx, nir_block *block)
1718 {
1719 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1720 union packed_instr header;
1721 header.u32 = blob_read_uint32(ctx->blob);
1722 nir_instr *instr;
1723
1724 switch (header.any.instr_type) {
1725 case nir_instr_type_alu:
1726 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1727 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1728 return header.alu.num_followup_alu_sharing_header + 1;
1729 case nir_instr_type_deref:
1730 instr = &read_deref(ctx, header)->instr;
1731 break;
1732 case nir_instr_type_intrinsic:
1733 instr = &read_intrinsic(ctx, header)->instr;
1734 break;
1735 case nir_instr_type_load_const:
1736 instr = &read_load_const(ctx, header)->instr;
1737 break;
1738 case nir_instr_type_ssa_undef:
1739 instr = &read_ssa_undef(ctx, header)->instr;
1740 break;
1741 case nir_instr_type_tex:
1742 instr = &read_tex(ctx, header)->instr;
1743 break;
1744 case nir_instr_type_phi:
1745 /* Phi instructions are a bit of a special case when reading because we
1746 * don't want inserting the instruction to automatically handle use/defs
1747 * for us. Instead, we need to wait until all the blocks/instructions
1748 * are read so that we can set their sources up.
1749 */
1750 read_phi(ctx, block, header);
1751 return 1;
1752 case nir_instr_type_jump:
1753 instr = &read_jump(ctx, header)->instr;
1754 break;
1755 case nir_instr_type_call:
1756 instr = &read_call(ctx)->instr;
1757 break;
1758 case nir_instr_type_parallel_copy:
1759 unreachable("Cannot read parallel copies");
1760 default:
1761 unreachable("bad instr type");
1762 }
1763
1764 nir_instr_insert_after_block(block, instr);
1765 return 1;
1766 }
1767
1768 static void
1769 write_block(write_ctx *ctx, const nir_block *block)
1770 {
1771 write_add_object(ctx, block);
1772 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1773
1774 ctx->last_instr_type = ~0;
1775 ctx->last_alu_header_offset = 0;
1776
1777 nir_foreach_instr(instr, block) {
1778 write_instr(ctx, instr);
1779 ctx->last_instr_type = instr->type;
1780 }
1781 }
1782
1783 static void
1784 read_block(read_ctx *ctx, struct exec_list *cf_list)
1785 {
1786 /* Don't actually create a new block. Just use the one from the tail of
1787 * the list. NIR guarantees that the tail of the list is a block and that
1788 * no two blocks are side-by-side in the IR; It should be empty.
1789 */
1790 nir_block *block =
1791 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1792
1793 read_add_object(ctx, block);
1794 unsigned num_instrs = blob_read_uint32(ctx->blob);
1795 for (unsigned i = 0; i < num_instrs;) {
1796 i += read_instr(ctx, block);
1797 }
1798 }
1799
1800 static void
1801 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1802
1803 static void
1804 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1805
1806 static void
1807 write_if(write_ctx *ctx, nir_if *nif)
1808 {
1809 write_src(ctx, &nif->condition);
1810
1811 write_cf_list(ctx, &nif->then_list);
1812 write_cf_list(ctx, &nif->else_list);
1813 }
1814
1815 static void
1816 read_if(read_ctx *ctx, struct exec_list *cf_list)
1817 {
1818 nir_if *nif = nir_if_create(ctx->nir);
1819
1820 read_src(ctx, &nif->condition, nif);
1821
1822 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1823
1824 read_cf_list(ctx, &nif->then_list);
1825 read_cf_list(ctx, &nif->else_list);
1826 }
1827
1828 static void
1829 write_loop(write_ctx *ctx, nir_loop *loop)
1830 {
1831 write_cf_list(ctx, &loop->body);
1832 }
1833
1834 static void
1835 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1836 {
1837 nir_loop *loop = nir_loop_create(ctx->nir);
1838
1839 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1840
1841 read_cf_list(ctx, &loop->body);
1842 }
1843
1844 static void
1845 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1846 {
1847 blob_write_uint32(ctx->blob, cf->type);
1848
1849 switch (cf->type) {
1850 case nir_cf_node_block:
1851 write_block(ctx, nir_cf_node_as_block(cf));
1852 break;
1853 case nir_cf_node_if:
1854 write_if(ctx, nir_cf_node_as_if(cf));
1855 break;
1856 case nir_cf_node_loop:
1857 write_loop(ctx, nir_cf_node_as_loop(cf));
1858 break;
1859 default:
1860 unreachable("bad cf type");
1861 }
1862 }
1863
1864 static void
1865 read_cf_node(read_ctx *ctx, struct exec_list *list)
1866 {
1867 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1868
1869 switch (type) {
1870 case nir_cf_node_block:
1871 read_block(ctx, list);
1872 break;
1873 case nir_cf_node_if:
1874 read_if(ctx, list);
1875 break;
1876 case nir_cf_node_loop:
1877 read_loop(ctx, list);
1878 break;
1879 default:
1880 unreachable("bad cf type");
1881 }
1882 }
1883
1884 static void
1885 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1886 {
1887 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1888 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1889 write_cf_node(ctx, cf);
1890 }
1891 }
1892
1893 static void
1894 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1895 {
1896 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1897 for (unsigned i = 0; i < num_cf_nodes; i++)
1898 read_cf_node(ctx, cf_list);
1899 }
1900
1901 static void
1902 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1903 {
1904 write_var_list(ctx, &fi->locals);
1905 write_reg_list(ctx, &fi->registers);
1906 blob_write_uint32(ctx->blob, fi->reg_alloc);
1907
1908 write_cf_list(ctx, &fi->body);
1909 write_fixup_phis(ctx);
1910 }
1911
1912 static nir_function_impl *
1913 read_function_impl(read_ctx *ctx, nir_function *fxn)
1914 {
1915 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1916 fi->function = fxn;
1917
1918 read_var_list(ctx, &fi->locals);
1919 read_reg_list(ctx, &fi->registers);
1920 fi->reg_alloc = blob_read_uint32(ctx->blob);
1921
1922 read_cf_list(ctx, &fi->body);
1923 read_fixup_phis(ctx);
1924
1925 fi->valid_metadata = 0;
1926
1927 return fi;
1928 }
1929
1930 static void
1931 write_function(write_ctx *ctx, const nir_function *fxn)
1932 {
1933 uint32_t flags = fxn->is_entrypoint;
1934 if (fxn->name)
1935 flags |= 0x2;
1936 if (fxn->impl)
1937 flags |= 0x4;
1938 blob_write_uint32(ctx->blob, flags);
1939 if (fxn->name)
1940 blob_write_string(ctx->blob, fxn->name);
1941
1942 write_add_object(ctx, fxn);
1943
1944 blob_write_uint32(ctx->blob, fxn->num_params);
1945 for (unsigned i = 0; i < fxn->num_params; i++) {
1946 uint32_t val =
1947 ((uint32_t)fxn->params[i].num_components) |
1948 ((uint32_t)fxn->params[i].bit_size) << 8;
1949 blob_write_uint32(ctx->blob, val);
1950 }
1951
1952 /* At first glance, it looks like we should write the function_impl here.
1953 * However, call instructions need to be able to reference at least the
1954 * function and those will get processed as we write the function_impls.
1955 * We stop here and write function_impls as a second pass.
1956 */
1957 }
1958
1959 static void
1960 read_function(read_ctx *ctx)
1961 {
1962 uint32_t flags = blob_read_uint32(ctx->blob);
1963 bool has_name = flags & 0x2;
1964 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1965
1966 nir_function *fxn = nir_function_create(ctx->nir, name);
1967
1968 read_add_object(ctx, fxn);
1969
1970 fxn->num_params = blob_read_uint32(ctx->blob);
1971 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1972 for (unsigned i = 0; i < fxn->num_params; i++) {
1973 uint32_t val = blob_read_uint32(ctx->blob);
1974 fxn->params[i].num_components = val & 0xff;
1975 fxn->params[i].bit_size = (val >> 8) & 0xff;
1976 }
1977
1978 fxn->is_entrypoint = flags & 0x1;
1979 if (flags & 0x4)
1980 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1981 }
1982
1983 /**
1984 * Serialize NIR into a binary blob.
1985 *
1986 * \param strip Don't serialize information only useful for debugging,
1987 * such as variable names, making cache hits from similar
1988 * shaders more likely.
1989 */
1990 void
1991 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1992 {
1993 write_ctx ctx = {0};
1994 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1995 ctx.blob = blob;
1996 ctx.nir = nir;
1997 ctx.strip = strip;
1998 util_dynarray_init(&ctx.phi_fixups, NULL);
1999
2000 size_t idx_size_offset = blob_reserve_uint32(blob);
2001
2002 struct shader_info info = nir->info;
2003 uint32_t strings = 0;
2004 if (!strip && info.name)
2005 strings |= 0x1;
2006 if (!strip && info.label)
2007 strings |= 0x2;
2008 blob_write_uint32(blob, strings);
2009 if (!strip && info.name)
2010 blob_write_string(blob, info.name);
2011 if (!strip && info.label)
2012 blob_write_string(blob, info.label);
2013 info.name = info.label = NULL;
2014 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2015
2016 write_var_list(&ctx, &nir->uniforms);
2017 write_var_list(&ctx, &nir->inputs);
2018 write_var_list(&ctx, &nir->outputs);
2019 write_var_list(&ctx, &nir->shared);
2020 write_var_list(&ctx, &nir->globals);
2021 write_var_list(&ctx, &nir->system_values);
2022
2023 blob_write_uint32(blob, nir->num_inputs);
2024 blob_write_uint32(blob, nir->num_uniforms);
2025 blob_write_uint32(blob, nir->num_outputs);
2026 blob_write_uint32(blob, nir->num_shared);
2027 blob_write_uint32(blob, nir->scratch_size);
2028
2029 blob_write_uint32(blob, exec_list_length(&nir->functions));
2030 nir_foreach_function(fxn, nir) {
2031 write_function(&ctx, fxn);
2032 }
2033
2034 nir_foreach_function(fxn, nir) {
2035 if (fxn->impl)
2036 write_function_impl(&ctx, fxn->impl);
2037 }
2038
2039 blob_write_uint32(blob, nir->constant_data_size);
2040 if (nir->constant_data_size > 0)
2041 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2042
2043 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
2044
2045 _mesa_hash_table_destroy(ctx.remap_table, NULL);
2046 util_dynarray_fini(&ctx.phi_fixups);
2047 }
2048
2049 nir_shader *
2050 nir_deserialize(void *mem_ctx,
2051 const struct nir_shader_compiler_options *options,
2052 struct blob_reader *blob)
2053 {
2054 read_ctx ctx = {0};
2055 ctx.blob = blob;
2056 list_inithead(&ctx.phi_srcs);
2057 ctx.idx_table_len = blob_read_uint32(blob);
2058 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2059
2060 uint32_t strings = blob_read_uint32(blob);
2061 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2062 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2063
2064 struct shader_info info;
2065 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2066
2067 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2068
2069 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2070 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2071
2072 ctx.nir->info = info;
2073
2074 read_var_list(&ctx, &ctx.nir->uniforms);
2075 read_var_list(&ctx, &ctx.nir->inputs);
2076 read_var_list(&ctx, &ctx.nir->outputs);
2077 read_var_list(&ctx, &ctx.nir->shared);
2078 read_var_list(&ctx, &ctx.nir->globals);
2079 read_var_list(&ctx, &ctx.nir->system_values);
2080
2081 ctx.nir->num_inputs = blob_read_uint32(blob);
2082 ctx.nir->num_uniforms = blob_read_uint32(blob);
2083 ctx.nir->num_outputs = blob_read_uint32(blob);
2084 ctx.nir->num_shared = blob_read_uint32(blob);
2085 ctx.nir->scratch_size = blob_read_uint32(blob);
2086
2087 unsigned num_functions = blob_read_uint32(blob);
2088 for (unsigned i = 0; i < num_functions; i++)
2089 read_function(&ctx);
2090
2091 nir_foreach_function(fxn, ctx.nir) {
2092 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2093 fxn->impl = read_function_impl(&ctx, fxn);
2094 }
2095
2096 ctx.nir->constant_data_size = blob_read_uint32(blob);
2097 if (ctx.nir->constant_data_size > 0) {
2098 ctx.nir->constant_data =
2099 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2100 blob_copy_bytes(blob, ctx.nir->constant_data,
2101 ctx.nir->constant_data_size);
2102 }
2103
2104 free(ctx.idx_table);
2105
2106 return ctx.nir;
2107 }
2108
2109 void
2110 nir_shader_serialize_deserialize(nir_shader *shader)
2111 {
2112 const struct nir_shader_compiler_options *options = shader->options;
2113
2114 struct blob writer;
2115 blob_init(&writer);
2116 nir_serialize(&writer, shader, false);
2117
2118 /* Delete all of dest's ralloc children but leave dest alone */
2119 void *dead_ctx = ralloc_context(NULL);
2120 ralloc_adopt(dead_ctx, shader);
2121 ralloc_free(dead_ctx);
2122
2123 dead_ctx = ralloc_context(NULL);
2124
2125 struct blob_reader reader;
2126 blob_reader_init(&reader, writer.data, writer.size);
2127 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2128
2129 blob_finish(&writer);
2130
2131 nir_shader_replace(shader, copy);
2132 ralloc_free(dead_ctx);
2133 }