Added few more stubs so that control reaches to DestroyDevice().
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* The last serialized type. */
55 const struct glsl_type *last_type;
56 const struct glsl_type *last_interface_type;
57 struct nir_variable_data last_var_data;
58
59 /* For skipping equal ALU headers (typical after scalarization). */
60 nir_instr_type last_instr_type;
61 uintptr_t last_alu_header_offset;
62
63 /* Don't write optional data such as variable names. */
64 bool strip;
65 } write_ctx;
66
67 typedef struct {
68 nir_shader *nir;
69
70 struct blob_reader *blob;
71
72 /* the next index to assign to a NIR in-memory object */
73 uint32_t next_idx;
74
75 /* The length of the index -> object table */
76 uint32_t idx_table_len;
77
78 /* map from index to deserialized pointer */
79 void **idx_table;
80
81 /* List of phi sources. */
82 struct list_head phi_srcs;
83
84 /* The last deserialized type. */
85 const struct glsl_type *last_type;
86 const struct glsl_type *last_interface_type;
87 struct nir_variable_data last_var_data;
88 } read_ctx;
89
90 static void
91 write_add_object(write_ctx *ctx, const void *obj)
92 {
93 uint32_t index = ctx->next_idx++;
94 assert(index != MAX_OBJECT_IDS);
95 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96 }
97
98 static uint32_t
99 write_lookup_object(write_ctx *ctx, const void *obj)
100 {
101 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102 assert(entry);
103 return (uint32_t)(uintptr_t) entry->data;
104 }
105
106 static void
107 read_add_object(read_ctx *ctx, void *obj)
108 {
109 assert(ctx->next_idx < ctx->idx_table_len);
110 ctx->idx_table[ctx->next_idx++] = obj;
111 }
112
113 static void *
114 read_lookup_object(read_ctx *ctx, uint32_t idx)
115 {
116 assert(idx < ctx->idx_table_len);
117 return ctx->idx_table[idx];
118 }
119
120 static void *
121 read_object(read_ctx *ctx)
122 {
123 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124 }
125
126 static uint32_t
127 encode_bit_size_3bits(uint8_t bit_size)
128 {
129 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131 if (bit_size)
132 return util_logbase2(bit_size) + 1;
133 return 0;
134 }
135
136 static uint8_t
137 decode_bit_size_3bits(uint8_t bit_size)
138 {
139 if (bit_size)
140 return 1 << (bit_size - 1);
141 return 0;
142 }
143
144 #define NUM_COMPONENTS_IS_SEPARATE_7 7
145
146 static uint8_t
147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149 if (num_components <= 4)
150 return num_components;
151 if (num_components == 8)
152 return 5;
153 if (num_components == 16)
154 return 6;
155
156 /* special value indicating that num_components is in the next uint32 */
157 return NUM_COMPONENTS_IS_SEPARATE_7;
158 }
159
160 static uint8_t
161 decode_num_components_in_3bits(uint8_t value)
162 {
163 if (value <= 4)
164 return value;
165 if (value == 5)
166 return 8;
167 if (value == 6)
168 return 16;
169
170 unreachable("invalid num_components encoding");
171 return 0;
172 }
173
174 static void
175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178 blob_write_uint32(ctx->blob, c->num_elements);
179 for (unsigned i = 0; i < c->num_elements; i++)
180 write_constant(ctx, c->elements[i]);
181 }
182
183 static nir_constant *
184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186 nir_constant *c = ralloc(nvar, nir_constant);
187
188 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189 c->num_elements = blob_read_uint32(ctx->blob);
190 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191 for (unsigned i = 0; i < c->num_elements; i++)
192 c->elements[i] = read_constant(ctx, nvar);
193
194 return c;
195 }
196
197 enum var_data_encoding {
198 var_encode_full,
199 var_encode_shader_temp,
200 var_encode_function_temp,
201 var_encode_location_diff,
202 };
203
204 union packed_var {
205 uint32_t u32;
206 struct {
207 unsigned has_name:1;
208 unsigned has_constant_initializer:1;
209 unsigned has_pointer_initializer:1;
210 unsigned has_interface_type:1;
211 unsigned num_state_slots:7;
212 unsigned data_encoding:2;
213 unsigned type_same_as_last:1;
214 unsigned interface_type_same_as_last:1;
215 unsigned _pad:1;
216 unsigned num_members:16;
217 } u;
218 };
219
220 union packed_var_data_diff {
221 uint32_t u32;
222 struct {
223 int location:13;
224 int location_frac:3;
225 int driver_location:16;
226 } u;
227 };
228
229 static void
230 write_variable(write_ctx *ctx, const nir_variable *var)
231 {
232 write_add_object(ctx, var);
233
234 assert(var->num_state_slots < (1 << 7));
235
236 STATIC_ASSERT(sizeof(union packed_var) == 4);
237 union packed_var flags;
238 flags.u32 = 0;
239
240 flags.u.has_name = !ctx->strip && var->name;
241 flags.u.has_constant_initializer = !!(var->constant_initializer);
242 flags.u.has_pointer_initializer = !!(var->pointer_initializer);
243 flags.u.has_interface_type = !!(var->interface_type);
244 flags.u.type_same_as_last = var->type == ctx->last_type;
245 flags.u.interface_type_same_as_last =
246 var->interface_type && var->interface_type == ctx->last_interface_type;
247 flags.u.num_state_slots = var->num_state_slots;
248 flags.u.num_members = var->num_members;
249
250 struct nir_variable_data data = var->data;
251
252 /* When stripping, we expect that the location is no longer needed,
253 * which is typically after shaders are linked.
254 */
255 if (ctx->strip &&
256 data.mode != nir_var_system_value &&
257 data.mode != nir_var_shader_in &&
258 data.mode != nir_var_shader_out)
259 data.location = 0;
260
261 /* Temporary variables don't serialize var->data. */
262 if (data.mode == nir_var_shader_temp)
263 flags.u.data_encoding = var_encode_shader_temp;
264 else if (data.mode == nir_var_function_temp)
265 flags.u.data_encoding = var_encode_function_temp;
266 else {
267 struct nir_variable_data tmp = data;
268
269 tmp.location = ctx->last_var_data.location;
270 tmp.location_frac = ctx->last_var_data.location_frac;
271 tmp.driver_location = ctx->last_var_data.driver_location;
272
273 /* See if we can encode only the difference in locations from the last
274 * variable.
275 */
276 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
277 abs((int)data.location -
278 (int)ctx->last_var_data.location) < (1 << 12) &&
279 abs((int)data.driver_location -
280 (int)ctx->last_var_data.driver_location) < (1 << 15))
281 flags.u.data_encoding = var_encode_location_diff;
282 else
283 flags.u.data_encoding = var_encode_full;
284 }
285
286 blob_write_uint32(ctx->blob, flags.u32);
287
288 if (!flags.u.type_same_as_last) {
289 encode_type_to_blob(ctx->blob, var->type);
290 ctx->last_type = var->type;
291 }
292
293 if (var->interface_type && !flags.u.interface_type_same_as_last) {
294 encode_type_to_blob(ctx->blob, var->interface_type);
295 ctx->last_interface_type = var->interface_type;
296 }
297
298 if (flags.u.has_name)
299 blob_write_string(ctx->blob, var->name);
300
301 if (flags.u.data_encoding == var_encode_full ||
302 flags.u.data_encoding == var_encode_location_diff) {
303 if (flags.u.data_encoding == var_encode_full) {
304 blob_write_bytes(ctx->blob, &data, sizeof(data));
305 } else {
306 /* Serialize only the difference in locations from the last variable.
307 */
308 union packed_var_data_diff diff;
309
310 diff.u.location = data.location - ctx->last_var_data.location;
311 diff.u.location_frac = data.location_frac -
312 ctx->last_var_data.location_frac;
313 diff.u.driver_location = data.driver_location -
314 ctx->last_var_data.driver_location;
315
316 blob_write_uint32(ctx->blob, diff.u32);
317 }
318
319 ctx->last_var_data = data;
320 }
321
322 for (unsigned i = 0; i < var->num_state_slots; i++) {
323 blob_write_bytes(ctx->blob, &var->state_slots[i],
324 sizeof(var->state_slots[i]));
325 }
326 if (var->constant_initializer)
327 write_constant(ctx, var->constant_initializer);
328 if (var->pointer_initializer)
329 write_lookup_object(ctx, var->pointer_initializer);
330 if (var->num_members > 0) {
331 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
332 var->num_members * sizeof(*var->members));
333 }
334 }
335
336 static nir_variable *
337 read_variable(read_ctx *ctx)
338 {
339 nir_variable *var = rzalloc(ctx->nir, nir_variable);
340 read_add_object(ctx, var);
341
342 union packed_var flags;
343 flags.u32 = blob_read_uint32(ctx->blob);
344
345 if (flags.u.type_same_as_last) {
346 var->type = ctx->last_type;
347 } else {
348 var->type = decode_type_from_blob(ctx->blob);
349 ctx->last_type = var->type;
350 }
351
352 if (flags.u.has_interface_type) {
353 if (flags.u.interface_type_same_as_last) {
354 var->interface_type = ctx->last_interface_type;
355 } else {
356 var->interface_type = decode_type_from_blob(ctx->blob);
357 ctx->last_interface_type = var->interface_type;
358 }
359 }
360
361 if (flags.u.has_name) {
362 const char *name = blob_read_string(ctx->blob);
363 var->name = ralloc_strdup(var, name);
364 } else {
365 var->name = NULL;
366 }
367
368 if (flags.u.data_encoding == var_encode_shader_temp)
369 var->data.mode = nir_var_shader_temp;
370 else if (flags.u.data_encoding == var_encode_function_temp)
371 var->data.mode = nir_var_function_temp;
372 else if (flags.u.data_encoding == var_encode_full) {
373 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
374 ctx->last_var_data = var->data;
375 } else { /* var_encode_location_diff */
376 union packed_var_data_diff diff;
377 diff.u32 = blob_read_uint32(ctx->blob);
378
379 var->data = ctx->last_var_data;
380 var->data.location += diff.u.location;
381 var->data.location_frac += diff.u.location_frac;
382 var->data.driver_location += diff.u.driver_location;
383
384 ctx->last_var_data = var->data;
385 }
386
387 var->num_state_slots = flags.u.num_state_slots;
388 if (var->num_state_slots != 0) {
389 var->state_slots = ralloc_array(var, nir_state_slot,
390 var->num_state_slots);
391 for (unsigned i = 0; i < var->num_state_slots; i++) {
392 blob_copy_bytes(ctx->blob, &var->state_slots[i],
393 sizeof(var->state_slots[i]));
394 }
395 }
396 if (flags.u.has_constant_initializer)
397 var->constant_initializer = read_constant(ctx, var);
398 else
399 var->constant_initializer = NULL;
400
401 if (flags.u.has_pointer_initializer)
402 var->pointer_initializer = read_object(ctx);
403 else
404 var->pointer_initializer = NULL;
405
406 var->num_members = flags.u.num_members;
407 if (var->num_members > 0) {
408 var->members = ralloc_array(var, struct nir_variable_data,
409 var->num_members);
410 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
411 var->num_members * sizeof(*var->members));
412 }
413
414 return var;
415 }
416
417 static void
418 write_var_list(write_ctx *ctx, const struct exec_list *src)
419 {
420 blob_write_uint32(ctx->blob, exec_list_length(src));
421 foreach_list_typed(nir_variable, var, node, src) {
422 write_variable(ctx, var);
423 }
424 }
425
426 static void
427 read_var_list(read_ctx *ctx, struct exec_list *dst)
428 {
429 exec_list_make_empty(dst);
430 unsigned num_vars = blob_read_uint32(ctx->blob);
431 for (unsigned i = 0; i < num_vars; i++) {
432 nir_variable *var = read_variable(ctx);
433 exec_list_push_tail(dst, &var->node);
434 }
435 }
436
437 static void
438 write_register(write_ctx *ctx, const nir_register *reg)
439 {
440 write_add_object(ctx, reg);
441 blob_write_uint32(ctx->blob, reg->num_components);
442 blob_write_uint32(ctx->blob, reg->bit_size);
443 blob_write_uint32(ctx->blob, reg->num_array_elems);
444 blob_write_uint32(ctx->blob, reg->index);
445 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
446 if (!ctx->strip && reg->name)
447 blob_write_string(ctx->blob, reg->name);
448 }
449
450 static nir_register *
451 read_register(read_ctx *ctx)
452 {
453 nir_register *reg = ralloc(ctx->nir, nir_register);
454 read_add_object(ctx, reg);
455 reg->num_components = blob_read_uint32(ctx->blob);
456 reg->bit_size = blob_read_uint32(ctx->blob);
457 reg->num_array_elems = blob_read_uint32(ctx->blob);
458 reg->index = blob_read_uint32(ctx->blob);
459 bool has_name = blob_read_uint32(ctx->blob);
460 if (has_name) {
461 const char *name = blob_read_string(ctx->blob);
462 reg->name = ralloc_strdup(reg, name);
463 } else {
464 reg->name = NULL;
465 }
466
467 list_inithead(&reg->uses);
468 list_inithead(&reg->defs);
469 list_inithead(&reg->if_uses);
470
471 return reg;
472 }
473
474 static void
475 write_reg_list(write_ctx *ctx, const struct exec_list *src)
476 {
477 blob_write_uint32(ctx->blob, exec_list_length(src));
478 foreach_list_typed(nir_register, reg, node, src)
479 write_register(ctx, reg);
480 }
481
482 static void
483 read_reg_list(read_ctx *ctx, struct exec_list *dst)
484 {
485 exec_list_make_empty(dst);
486 unsigned num_regs = blob_read_uint32(ctx->blob);
487 for (unsigned i = 0; i < num_regs; i++) {
488 nir_register *reg = read_register(ctx);
489 exec_list_push_tail(dst, &reg->node);
490 }
491 }
492
493 union packed_src {
494 uint32_t u32;
495 struct {
496 unsigned is_ssa:1; /* <-- Header */
497 unsigned is_indirect:1;
498 unsigned object_idx:20;
499 unsigned _footer:10; /* <-- Footer */
500 } any;
501 struct {
502 unsigned _header:22; /* <-- Header */
503 unsigned negate:1; /* <-- Footer */
504 unsigned abs:1;
505 unsigned swizzle_x:2;
506 unsigned swizzle_y:2;
507 unsigned swizzle_z:2;
508 unsigned swizzle_w:2;
509 } alu;
510 struct {
511 unsigned _header:22; /* <-- Header */
512 unsigned src_type:5; /* <-- Footer */
513 unsigned _pad:5;
514 } tex;
515 };
516
517 static void
518 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
519 {
520 /* Since sources are very frequent, we try to save some space when storing
521 * them. In particular, we store whether the source is a register and
522 * whether the register has an indirect index in the low two bits. We can
523 * assume that the high two bits of the index are zero, since otherwise our
524 * address space would've been exhausted allocating the remap table!
525 */
526 header.any.is_ssa = src->is_ssa;
527 if (src->is_ssa) {
528 header.any.object_idx = write_lookup_object(ctx, src->ssa);
529 blob_write_uint32(ctx->blob, header.u32);
530 } else {
531 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
532 header.any.is_indirect = !!src->reg.indirect;
533 blob_write_uint32(ctx->blob, header.u32);
534 blob_write_uint32(ctx->blob, src->reg.base_offset);
535 if (src->reg.indirect) {
536 union packed_src header = {0};
537 write_src_full(ctx, src->reg.indirect, header);
538 }
539 }
540 }
541
542 static void
543 write_src(write_ctx *ctx, const nir_src *src)
544 {
545 union packed_src header = {0};
546 write_src_full(ctx, src, header);
547 }
548
549 static union packed_src
550 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
551 {
552 STATIC_ASSERT(sizeof(union packed_src) == 4);
553 union packed_src header;
554 header.u32 = blob_read_uint32(ctx->blob);
555
556 src->is_ssa = header.any.is_ssa;
557 if (src->is_ssa) {
558 src->ssa = read_lookup_object(ctx, header.any.object_idx);
559 } else {
560 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
561 src->reg.base_offset = blob_read_uint32(ctx->blob);
562 if (header.any.is_indirect) {
563 src->reg.indirect = ralloc(mem_ctx, nir_src);
564 read_src(ctx, src->reg.indirect, mem_ctx);
565 } else {
566 src->reg.indirect = NULL;
567 }
568 }
569 return header;
570 }
571
572 union packed_dest {
573 uint8_t u8;
574 struct {
575 uint8_t is_ssa:1;
576 uint8_t has_name:1;
577 uint8_t num_components:3;
578 uint8_t bit_size:3;
579 } ssa;
580 struct {
581 uint8_t is_ssa:1;
582 uint8_t is_indirect:1;
583 uint8_t _pad:6;
584 } reg;
585 };
586
587 enum intrinsic_const_indices_encoding {
588 /* Use the 9 bits of packed_const_indices to store 1-9 indices.
589 * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
590 * 4 2-bit indices, or 5-9 1-bit indices.
591 *
592 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
593 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
594 */
595 const_indices_9bit_all_combined,
596
597 const_indices_8bit, /* 8 bits per element */
598 const_indices_16bit, /* 16 bits per element */
599 const_indices_32bit, /* 32 bits per element */
600 };
601
602 enum load_const_packing {
603 /* Constants are not packed and are stored in following dwords. */
604 load_const_full,
605
606 /* packed_value contains high 19 bits, low bits are 0,
607 * good for floating-point decimals
608 */
609 load_const_scalar_hi_19bits,
610
611 /* packed_value contains low 19 bits, high bits are sign-extended */
612 load_const_scalar_lo_19bits_sext,
613 };
614
615 union packed_instr {
616 uint32_t u32;
617 struct {
618 unsigned instr_type:4; /* always present */
619 unsigned _pad:20;
620 unsigned dest:8; /* always last */
621 } any;
622 struct {
623 unsigned instr_type:4;
624 unsigned exact:1;
625 unsigned no_signed_wrap:1;
626 unsigned no_unsigned_wrap:1;
627 unsigned saturate:1;
628 /* Reg: writemask; SSA: swizzles for 2 srcs */
629 unsigned writemask_or_two_swizzles:4;
630 unsigned op:9;
631 unsigned packed_src_ssa_16bit:1;
632 /* Scalarized ALUs always have the same header. */
633 unsigned num_followup_alu_sharing_header:2;
634 unsigned dest:8;
635 } alu;
636 struct {
637 unsigned instr_type:4;
638 unsigned deref_type:3;
639 unsigned cast_type_same_as_last:1;
640 unsigned mode:12; /* deref_var redefines this */
641 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
642 unsigned _pad:3; /* deref_var redefines this */
643 unsigned dest:8;
644 } deref;
645 struct {
646 unsigned instr_type:4;
647 unsigned deref_type:3;
648 unsigned _pad:1;
649 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
650 unsigned dest:8;
651 } deref_var;
652 struct {
653 unsigned instr_type:4;
654 unsigned intrinsic:9;
655 unsigned const_indices_encoding:2;
656 unsigned packed_const_indices:9;
657 unsigned dest:8;
658 } intrinsic;
659 struct {
660 unsigned instr_type:4;
661 unsigned last_component:4;
662 unsigned bit_size:3;
663 unsigned packing:2; /* enum load_const_packing */
664 unsigned packed_value:19; /* meaning determined by packing */
665 } load_const;
666 struct {
667 unsigned instr_type:4;
668 unsigned last_component:4;
669 unsigned bit_size:3;
670 unsigned _pad:21;
671 } undef;
672 struct {
673 unsigned instr_type:4;
674 unsigned num_srcs:4;
675 unsigned op:4;
676 unsigned dest:8;
677 unsigned _pad:12;
678 } tex;
679 struct {
680 unsigned instr_type:4;
681 unsigned num_srcs:20;
682 unsigned dest:8;
683 } phi;
684 struct {
685 unsigned instr_type:4;
686 unsigned type:2;
687 unsigned _pad:26;
688 } jump;
689 };
690
691 /* Write "lo24" as low 24 bits in the first uint32. */
692 static void
693 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
694 nir_instr_type instr_type)
695 {
696 STATIC_ASSERT(sizeof(union packed_dest) == 1);
697 union packed_dest dest;
698 dest.u8 = 0;
699
700 dest.ssa.is_ssa = dst->is_ssa;
701 if (dst->is_ssa) {
702 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
703 dest.ssa.num_components =
704 encode_num_components_in_3bits(dst->ssa.num_components);
705 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
706 } else {
707 dest.reg.is_indirect = !!(dst->reg.indirect);
708 }
709 header.any.dest = dest.u8;
710
711 /* Check if the current ALU instruction has the same header as the previous
712 * instruction that is also ALU. If it is, we don't have to write
713 * the current header. This is a typical occurence after scalarization.
714 */
715 if (instr_type == nir_instr_type_alu) {
716 bool equal_header = false;
717
718 if (ctx->last_instr_type == nir_instr_type_alu) {
719 assert(ctx->last_alu_header_offset);
720 union packed_instr *last_header =
721 (union packed_instr *)(ctx->blob->data +
722 ctx->last_alu_header_offset);
723
724 /* Clear the field that counts ALUs with equal headers. */
725 union packed_instr clean_header;
726 clean_header.u32 = last_header->u32;
727 clean_header.alu.num_followup_alu_sharing_header = 0;
728
729 /* There can be at most 4 consecutive ALU instructions
730 * sharing the same header.
731 */
732 if (last_header->alu.num_followup_alu_sharing_header < 3 &&
733 header.u32 == clean_header.u32) {
734 last_header->alu.num_followup_alu_sharing_header++;
735 equal_header = true;
736 }
737 }
738
739 if (!equal_header) {
740 ctx->last_alu_header_offset = ctx->blob->size;
741 blob_write_uint32(ctx->blob, header.u32);
742 }
743 } else {
744 blob_write_uint32(ctx->blob, header.u32);
745 }
746
747 if (dest.ssa.is_ssa &&
748 dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
749 blob_write_uint32(ctx->blob, dst->ssa.num_components);
750
751 if (dst->is_ssa) {
752 write_add_object(ctx, &dst->ssa);
753 if (dest.ssa.has_name)
754 blob_write_string(ctx->blob, dst->ssa.name);
755 } else {
756 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
757 blob_write_uint32(ctx->blob, dst->reg.base_offset);
758 if (dst->reg.indirect)
759 write_src(ctx, dst->reg.indirect);
760 }
761 }
762
763 static void
764 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
765 union packed_instr header)
766 {
767 union packed_dest dest;
768 dest.u8 = header.any.dest;
769
770 if (dest.ssa.is_ssa) {
771 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
772 unsigned num_components;
773 if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
774 num_components = blob_read_uint32(ctx->blob);
775 else
776 num_components = decode_num_components_in_3bits(dest.ssa.num_components);
777 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
778 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
779 read_add_object(ctx, &dst->ssa);
780 } else {
781 dst->reg.reg = read_object(ctx);
782 dst->reg.base_offset = blob_read_uint32(ctx->blob);
783 if (dest.reg.is_indirect) {
784 dst->reg.indirect = ralloc(instr, nir_src);
785 read_src(ctx, dst->reg.indirect, instr);
786 }
787 }
788 }
789
790 static bool
791 are_object_ids_16bit(write_ctx *ctx)
792 {
793 /* Check the highest object ID, because they are monotonic. */
794 return ctx->next_idx < (1 << 16);
795 }
796
797 static bool
798 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
799 {
800 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
801
802 for (unsigned i = 0; i < num_srcs; i++) {
803 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
804 return false;
805
806 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
807
808 for (unsigned chan = 0; chan < src_components; chan++) {
809 /* The swizzles for src0.x and src1.x are stored
810 * in writemask_or_two_swizzles for SSA ALUs.
811 */
812 if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
813 alu->src[i].swizzle[chan] < 4)
814 continue;
815
816 if (alu->src[i].swizzle[chan] != chan)
817 return false;
818 }
819 }
820
821 return are_object_ids_16bit(ctx);
822 }
823
824 static void
825 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
826 {
827 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
828 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
829
830 /* 9 bits for nir_op */
831 STATIC_ASSERT(nir_num_opcodes <= 512);
832 union packed_instr header;
833 header.u32 = 0;
834
835 header.alu.instr_type = alu->instr.type;
836 header.alu.exact = alu->exact;
837 header.alu.no_signed_wrap = alu->no_signed_wrap;
838 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
839 header.alu.saturate = alu->dest.saturate;
840 header.alu.op = alu->op;
841 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
842
843 if (header.alu.packed_src_ssa_16bit &&
844 alu->dest.dest.is_ssa) {
845 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
846 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
847 if (num_srcs > 1)
848 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
849 } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
850 /* For vec4 registers, this field is a writemask. */
851 header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
852 }
853
854 write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
855
856 if (!alu->dest.dest.is_ssa && dst_components > 4)
857 blob_write_uint32(ctx->blob, alu->dest.write_mask);
858
859 if (header.alu.packed_src_ssa_16bit) {
860 for (unsigned i = 0; i < num_srcs; i++) {
861 assert(alu->src[i].src.is_ssa);
862 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
863 assert(idx < (1 << 16));
864 blob_write_uint16(ctx->blob, idx);
865 }
866 } else {
867 for (unsigned i = 0; i < num_srcs; i++) {
868 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
869 unsigned src_components = nir_src_num_components(alu->src[i].src);
870 union packed_src src;
871 bool packed = src_components <= 4 && src_channels <= 4;
872 src.u32 = 0;
873
874 src.alu.negate = alu->src[i].negate;
875 src.alu.abs = alu->src[i].abs;
876
877 if (packed) {
878 src.alu.swizzle_x = alu->src[i].swizzle[0];
879 src.alu.swizzle_y = alu->src[i].swizzle[1];
880 src.alu.swizzle_z = alu->src[i].swizzle[2];
881 src.alu.swizzle_w = alu->src[i].swizzle[3];
882 }
883
884 write_src_full(ctx, &alu->src[i].src, src);
885
886 /* Store swizzles for vec8 and vec16. */
887 if (!packed) {
888 for (unsigned o = 0; o < src_channels; o += 8) {
889 unsigned value = 0;
890
891 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
892 value |= (uint32_t)alu->src[i].swizzle[o + j] <<
893 (4 * j); /* 4 bits per swizzle */
894 }
895
896 blob_write_uint32(ctx->blob, value);
897 }
898 }
899 }
900 }
901 }
902
903 static nir_alu_instr *
904 read_alu(read_ctx *ctx, union packed_instr header)
905 {
906 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
907 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
908
909 alu->exact = header.alu.exact;
910 alu->no_signed_wrap = header.alu.no_signed_wrap;
911 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
912 alu->dest.saturate = header.alu.saturate;
913
914 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
915
916 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
917
918 if (alu->dest.dest.is_ssa) {
919 alu->dest.write_mask = u_bit_consecutive(0, dst_components);
920 } else if (dst_components <= 4) {
921 alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
922 } else {
923 alu->dest.write_mask = blob_read_uint32(ctx->blob);
924 }
925
926 if (header.alu.packed_src_ssa_16bit) {
927 for (unsigned i = 0; i < num_srcs; i++) {
928 nir_alu_src *src = &alu->src[i];
929 src->src.is_ssa = true;
930 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
931
932 memset(&src->swizzle, 0, sizeof(src->swizzle));
933
934 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
935
936 for (unsigned chan = 0; chan < src_components; chan++)
937 src->swizzle[chan] = chan;
938 }
939 } else {
940 for (unsigned i = 0; i < num_srcs; i++) {
941 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
942 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
943 unsigned src_components = nir_src_num_components(alu->src[i].src);
944 bool packed = src_components <= 4 && src_channels <= 4;
945
946 alu->src[i].negate = src.alu.negate;
947 alu->src[i].abs = src.alu.abs;
948
949 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
950
951 if (packed) {
952 alu->src[i].swizzle[0] = src.alu.swizzle_x;
953 alu->src[i].swizzle[1] = src.alu.swizzle_y;
954 alu->src[i].swizzle[2] = src.alu.swizzle_z;
955 alu->src[i].swizzle[3] = src.alu.swizzle_w;
956 } else {
957 /* Load swizzles for vec8 and vec16. */
958 for (unsigned o = 0; o < src_channels; o += 8) {
959 unsigned value = blob_read_uint32(ctx->blob);
960
961 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
962 alu->src[i].swizzle[o + j] =
963 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
964 }
965 }
966 }
967 }
968 }
969
970 if (header.alu.packed_src_ssa_16bit &&
971 alu->dest.dest.is_ssa) {
972 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
973 if (num_srcs > 1)
974 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
975 }
976
977 return alu;
978 }
979
980 static void
981 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
982 {
983 assert(deref->deref_type < 8);
984 assert(deref->mode < (1 << 12));
985
986 union packed_instr header;
987 header.u32 = 0;
988
989 header.deref.instr_type = deref->instr.type;
990 header.deref.deref_type = deref->deref_type;
991
992 if (deref->deref_type == nir_deref_type_cast) {
993 header.deref.mode = deref->mode;
994 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
995 }
996
997 unsigned var_idx = 0;
998 if (deref->deref_type == nir_deref_type_var) {
999 var_idx = write_lookup_object(ctx, deref->var);
1000 if (var_idx && var_idx < (1 << 16))
1001 header.deref_var.object_idx = var_idx;
1002 }
1003
1004 if (deref->deref_type == nir_deref_type_array ||
1005 deref->deref_type == nir_deref_type_ptr_as_array) {
1006 header.deref.packed_src_ssa_16bit =
1007 deref->parent.is_ssa && deref->arr.index.is_ssa &&
1008 are_object_ids_16bit(ctx);
1009 }
1010
1011 write_dest(ctx, &deref->dest, header, deref->instr.type);
1012
1013 switch (deref->deref_type) {
1014 case nir_deref_type_var:
1015 if (!header.deref_var.object_idx)
1016 blob_write_uint32(ctx->blob, var_idx);
1017 break;
1018
1019 case nir_deref_type_struct:
1020 write_src(ctx, &deref->parent);
1021 blob_write_uint32(ctx->blob, deref->strct.index);
1022 break;
1023
1024 case nir_deref_type_array:
1025 case nir_deref_type_ptr_as_array:
1026 if (header.deref.packed_src_ssa_16bit) {
1027 blob_write_uint16(ctx->blob,
1028 write_lookup_object(ctx, deref->parent.ssa));
1029 blob_write_uint16(ctx->blob,
1030 write_lookup_object(ctx, deref->arr.index.ssa));
1031 } else {
1032 write_src(ctx, &deref->parent);
1033 write_src(ctx, &deref->arr.index);
1034 }
1035 break;
1036
1037 case nir_deref_type_cast:
1038 write_src(ctx, &deref->parent);
1039 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1040 blob_write_uint32(ctx->blob, deref->cast.align_mul);
1041 blob_write_uint32(ctx->blob, deref->cast.align_offset);
1042 if (!header.deref.cast_type_same_as_last) {
1043 encode_type_to_blob(ctx->blob, deref->type);
1044 ctx->last_type = deref->type;
1045 }
1046 break;
1047
1048 case nir_deref_type_array_wildcard:
1049 write_src(ctx, &deref->parent);
1050 break;
1051
1052 default:
1053 unreachable("Invalid deref type");
1054 }
1055 }
1056
1057 static nir_deref_instr *
1058 read_deref(read_ctx *ctx, union packed_instr header)
1059 {
1060 nir_deref_type deref_type = header.deref.deref_type;
1061 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1062
1063 read_dest(ctx, &deref->dest, &deref->instr, header);
1064
1065 nir_deref_instr *parent;
1066
1067 switch (deref->deref_type) {
1068 case nir_deref_type_var:
1069 if (header.deref_var.object_idx)
1070 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1071 else
1072 deref->var = read_object(ctx);
1073
1074 deref->type = deref->var->type;
1075 break;
1076
1077 case nir_deref_type_struct:
1078 read_src(ctx, &deref->parent, &deref->instr);
1079 parent = nir_src_as_deref(deref->parent);
1080 deref->strct.index = blob_read_uint32(ctx->blob);
1081 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1082 break;
1083
1084 case nir_deref_type_array:
1085 case nir_deref_type_ptr_as_array:
1086 if (header.deref.packed_src_ssa_16bit) {
1087 deref->parent.is_ssa = true;
1088 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1089 deref->arr.index.is_ssa = true;
1090 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1091 } else {
1092 read_src(ctx, &deref->parent, &deref->instr);
1093 read_src(ctx, &deref->arr.index, &deref->instr);
1094 }
1095
1096 parent = nir_src_as_deref(deref->parent);
1097 if (deref->deref_type == nir_deref_type_array)
1098 deref->type = glsl_get_array_element(parent->type);
1099 else
1100 deref->type = parent->type;
1101 break;
1102
1103 case nir_deref_type_cast:
1104 read_src(ctx, &deref->parent, &deref->instr);
1105 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1106 deref->cast.align_mul = blob_read_uint32(ctx->blob);
1107 deref->cast.align_offset = blob_read_uint32(ctx->blob);
1108 if (header.deref.cast_type_same_as_last) {
1109 deref->type = ctx->last_type;
1110 } else {
1111 deref->type = decode_type_from_blob(ctx->blob);
1112 ctx->last_type = deref->type;
1113 }
1114 break;
1115
1116 case nir_deref_type_array_wildcard:
1117 read_src(ctx, &deref->parent, &deref->instr);
1118 parent = nir_src_as_deref(deref->parent);
1119 deref->type = glsl_get_array_element(parent->type);
1120 break;
1121
1122 default:
1123 unreachable("Invalid deref type");
1124 }
1125
1126 if (deref_type == nir_deref_type_var) {
1127 deref->mode = deref->var->data.mode;
1128 } else if (deref->deref_type == nir_deref_type_cast) {
1129 deref->mode = header.deref.mode;
1130 } else {
1131 assert(deref->parent.is_ssa);
1132 deref->mode = nir_instr_as_deref(deref->parent.ssa->parent_instr)->mode;
1133 }
1134
1135 return deref;
1136 }
1137
1138 static void
1139 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1140 {
1141 /* 9 bits for nir_intrinsic_op */
1142 STATIC_ASSERT(nir_num_intrinsics <= 512);
1143 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1144 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1145 assert(intrin->intrinsic < 512);
1146
1147 union packed_instr header;
1148 header.u32 = 0;
1149
1150 header.intrinsic.instr_type = intrin->instr.type;
1151 header.intrinsic.intrinsic = intrin->intrinsic;
1152
1153 /* Analyze constant indices to decide how to encode them. */
1154 if (num_indices) {
1155 unsigned max_bits = 0;
1156 for (unsigned i = 0; i < num_indices; i++) {
1157 unsigned max = util_last_bit(intrin->const_index[i]);
1158 max_bits = MAX2(max_bits, max);
1159 }
1160
1161 if (max_bits * num_indices <= 9) {
1162 header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
1163
1164 /* Pack all const indices into 6 bits. */
1165 unsigned bit_size = 9 / num_indices;
1166 for (unsigned i = 0; i < num_indices; i++) {
1167 header.intrinsic.packed_const_indices |=
1168 intrin->const_index[i] << (i * bit_size);
1169 }
1170 } else if (max_bits <= 8)
1171 header.intrinsic.const_indices_encoding = const_indices_8bit;
1172 else if (max_bits <= 16)
1173 header.intrinsic.const_indices_encoding = const_indices_16bit;
1174 else
1175 header.intrinsic.const_indices_encoding = const_indices_32bit;
1176 }
1177
1178 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1179 write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1180 else
1181 blob_write_uint32(ctx->blob, header.u32);
1182
1183 for (unsigned i = 0; i < num_srcs; i++)
1184 write_src(ctx, &intrin->src[i]);
1185
1186 if (num_indices) {
1187 switch (header.intrinsic.const_indices_encoding) {
1188 case const_indices_8bit:
1189 for (unsigned i = 0; i < num_indices; i++)
1190 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1191 break;
1192 case const_indices_16bit:
1193 for (unsigned i = 0; i < num_indices; i++)
1194 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1195 break;
1196 case const_indices_32bit:
1197 for (unsigned i = 0; i < num_indices; i++)
1198 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1199 break;
1200 }
1201 }
1202 }
1203
1204 static nir_intrinsic_instr *
1205 read_intrinsic(read_ctx *ctx, union packed_instr header)
1206 {
1207 nir_intrinsic_op op = header.intrinsic.intrinsic;
1208 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1209
1210 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1211 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1212
1213 if (nir_intrinsic_infos[op].has_dest)
1214 read_dest(ctx, &intrin->dest, &intrin->instr, header);
1215
1216 for (unsigned i = 0; i < num_srcs; i++)
1217 read_src(ctx, &intrin->src[i], &intrin->instr);
1218
1219 /* Vectorized instrinsics have num_components same as dst or src that has
1220 * 0 components in the info. Find it.
1221 */
1222 if (nir_intrinsic_infos[op].has_dest &&
1223 nir_intrinsic_infos[op].dest_components == 0) {
1224 intrin->num_components = nir_dest_num_components(intrin->dest);
1225 } else {
1226 for (unsigned i = 0; i < num_srcs; i++) {
1227 if (nir_intrinsic_infos[op].src_components[i] == 0) {
1228 intrin->num_components = nir_src_num_components(intrin->src[i]);
1229 break;
1230 }
1231 }
1232 }
1233
1234 if (num_indices) {
1235 switch (header.intrinsic.const_indices_encoding) {
1236 case const_indices_9bit_all_combined: {
1237 unsigned bit_size = 9 / num_indices;
1238 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1239 for (unsigned i = 0; i < num_indices; i++) {
1240 intrin->const_index[i] =
1241 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1242 bit_mask;
1243 }
1244 break;
1245 }
1246 case const_indices_8bit:
1247 for (unsigned i = 0; i < num_indices; i++)
1248 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1249 break;
1250 case const_indices_16bit:
1251 for (unsigned i = 0; i < num_indices; i++)
1252 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1253 break;
1254 case const_indices_32bit:
1255 for (unsigned i = 0; i < num_indices; i++)
1256 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1257 break;
1258 }
1259 }
1260
1261 return intrin;
1262 }
1263
1264 static void
1265 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1266 {
1267 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1268 union packed_instr header;
1269 header.u32 = 0;
1270
1271 header.load_const.instr_type = lc->instr.type;
1272 header.load_const.last_component = lc->def.num_components - 1;
1273 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1274 header.load_const.packing = load_const_full;
1275
1276 /* Try to pack 1-component constants into the 19 free bits in the header. */
1277 if (lc->def.num_components == 1) {
1278 switch (lc->def.bit_size) {
1279 case 64:
1280 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1281 /* packed_value contains high 19 bits, low bits are 0 */
1282 header.load_const.packing = load_const_scalar_hi_19bits;
1283 header.load_const.packed_value = lc->value[0].u64 >> 45;
1284 } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1285 /* packed_value contains low 19 bits, high bits are sign-extended */
1286 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1287 header.load_const.packed_value = lc->value[0].u64;
1288 }
1289 break;
1290
1291 case 32:
1292 if ((lc->value[0].u32 & 0x1fff) == 0) {
1293 header.load_const.packing = load_const_scalar_hi_19bits;
1294 header.load_const.packed_value = lc->value[0].u32 >> 13;
1295 } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1296 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1297 header.load_const.packed_value = lc->value[0].u32;
1298 }
1299 break;
1300
1301 case 16:
1302 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1303 header.load_const.packed_value = lc->value[0].u16;
1304 break;
1305 case 8:
1306 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1307 header.load_const.packed_value = lc->value[0].u8;
1308 break;
1309 case 1:
1310 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1311 header.load_const.packed_value = lc->value[0].b;
1312 break;
1313 default:
1314 unreachable("invalid bit_size");
1315 }
1316 }
1317
1318 blob_write_uint32(ctx->blob, header.u32);
1319
1320 if (header.load_const.packing == load_const_full) {
1321 switch (lc->def.bit_size) {
1322 case 64:
1323 blob_write_bytes(ctx->blob, lc->value,
1324 sizeof(*lc->value) * lc->def.num_components);
1325 break;
1326
1327 case 32:
1328 for (unsigned i = 0; i < lc->def.num_components; i++)
1329 blob_write_uint32(ctx->blob, lc->value[i].u32);
1330 break;
1331
1332 case 16:
1333 for (unsigned i = 0; i < lc->def.num_components; i++)
1334 blob_write_uint16(ctx->blob, lc->value[i].u16);
1335 break;
1336
1337 default:
1338 assert(lc->def.bit_size <= 8);
1339 for (unsigned i = 0; i < lc->def.num_components; i++)
1340 blob_write_uint8(ctx->blob, lc->value[i].u8);
1341 break;
1342 }
1343 }
1344
1345 write_add_object(ctx, &lc->def);
1346 }
1347
1348 static nir_load_const_instr *
1349 read_load_const(read_ctx *ctx, union packed_instr header)
1350 {
1351 nir_load_const_instr *lc =
1352 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1353 decode_bit_size_3bits(header.load_const.bit_size));
1354
1355 switch (header.load_const.packing) {
1356 case load_const_scalar_hi_19bits:
1357 switch (lc->def.bit_size) {
1358 case 64:
1359 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1360 break;
1361 case 32:
1362 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1363 break;
1364 default:
1365 unreachable("invalid bit_size");
1366 }
1367 break;
1368
1369 case load_const_scalar_lo_19bits_sext:
1370 switch (lc->def.bit_size) {
1371 case 64:
1372 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1373 break;
1374 case 32:
1375 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1376 break;
1377 case 16:
1378 lc->value[0].u16 = header.load_const.packed_value;
1379 break;
1380 case 8:
1381 lc->value[0].u8 = header.load_const.packed_value;
1382 break;
1383 case 1:
1384 lc->value[0].b = header.load_const.packed_value;
1385 break;
1386 default:
1387 unreachable("invalid bit_size");
1388 }
1389 break;
1390
1391 case load_const_full:
1392 switch (lc->def.bit_size) {
1393 case 64:
1394 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1395 break;
1396
1397 case 32:
1398 for (unsigned i = 0; i < lc->def.num_components; i++)
1399 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1400 break;
1401
1402 case 16:
1403 for (unsigned i = 0; i < lc->def.num_components; i++)
1404 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1405 break;
1406
1407 default:
1408 assert(lc->def.bit_size <= 8);
1409 for (unsigned i = 0; i < lc->def.num_components; i++)
1410 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1411 break;
1412 }
1413 break;
1414 }
1415
1416 read_add_object(ctx, &lc->def);
1417 return lc;
1418 }
1419
1420 static void
1421 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1422 {
1423 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1424
1425 union packed_instr header;
1426 header.u32 = 0;
1427
1428 header.undef.instr_type = undef->instr.type;
1429 header.undef.last_component = undef->def.num_components - 1;
1430 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1431
1432 blob_write_uint32(ctx->blob, header.u32);
1433 write_add_object(ctx, &undef->def);
1434 }
1435
1436 static nir_ssa_undef_instr *
1437 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1438 {
1439 nir_ssa_undef_instr *undef =
1440 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1441 decode_bit_size_3bits(header.undef.bit_size));
1442
1443 read_add_object(ctx, &undef->def);
1444 return undef;
1445 }
1446
1447 union packed_tex_data {
1448 uint32_t u32;
1449 struct {
1450 unsigned sampler_dim:4;
1451 unsigned dest_type:8;
1452 unsigned coord_components:3;
1453 unsigned is_array:1;
1454 unsigned is_shadow:1;
1455 unsigned is_new_style_shadow:1;
1456 unsigned component:2;
1457 unsigned texture_non_uniform:1;
1458 unsigned sampler_non_uniform:1;
1459 unsigned unused:8; /* Mark unused for valgrind. */
1460 } u;
1461 };
1462
1463 static void
1464 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1465 {
1466 assert(tex->num_srcs < 16);
1467 assert(tex->op < 16);
1468
1469 union packed_instr header;
1470 header.u32 = 0;
1471
1472 header.tex.instr_type = tex->instr.type;
1473 header.tex.num_srcs = tex->num_srcs;
1474 header.tex.op = tex->op;
1475
1476 write_dest(ctx, &tex->dest, header, tex->instr.type);
1477
1478 blob_write_uint32(ctx->blob, tex->texture_index);
1479 blob_write_uint32(ctx->blob, tex->sampler_index);
1480 if (tex->op == nir_texop_tg4)
1481 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1482
1483 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1484 union packed_tex_data packed = {
1485 .u.sampler_dim = tex->sampler_dim,
1486 .u.dest_type = tex->dest_type,
1487 .u.coord_components = tex->coord_components,
1488 .u.is_array = tex->is_array,
1489 .u.is_shadow = tex->is_shadow,
1490 .u.is_new_style_shadow = tex->is_new_style_shadow,
1491 .u.component = tex->component,
1492 .u.texture_non_uniform = tex->texture_non_uniform,
1493 .u.sampler_non_uniform = tex->sampler_non_uniform,
1494 };
1495 blob_write_uint32(ctx->blob, packed.u32);
1496
1497 for (unsigned i = 0; i < tex->num_srcs; i++) {
1498 union packed_src src;
1499 src.u32 = 0;
1500 src.tex.src_type = tex->src[i].src_type;
1501 write_src_full(ctx, &tex->src[i].src, src);
1502 }
1503 }
1504
1505 static nir_tex_instr *
1506 read_tex(read_ctx *ctx, union packed_instr header)
1507 {
1508 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1509
1510 read_dest(ctx, &tex->dest, &tex->instr, header);
1511
1512 tex->op = header.tex.op;
1513 tex->texture_index = blob_read_uint32(ctx->blob);
1514 tex->sampler_index = blob_read_uint32(ctx->blob);
1515 if (tex->op == nir_texop_tg4)
1516 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1517
1518 union packed_tex_data packed;
1519 packed.u32 = blob_read_uint32(ctx->blob);
1520 tex->sampler_dim = packed.u.sampler_dim;
1521 tex->dest_type = packed.u.dest_type;
1522 tex->coord_components = packed.u.coord_components;
1523 tex->is_array = packed.u.is_array;
1524 tex->is_shadow = packed.u.is_shadow;
1525 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1526 tex->component = packed.u.component;
1527 tex->texture_non_uniform = packed.u.texture_non_uniform;
1528 tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1529
1530 for (unsigned i = 0; i < tex->num_srcs; i++) {
1531 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1532 tex->src[i].src_type = src.tex.src_type;
1533 }
1534
1535 return tex;
1536 }
1537
1538 static void
1539 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1540 {
1541 union packed_instr header;
1542 header.u32 = 0;
1543
1544 header.phi.instr_type = phi->instr.type;
1545 header.phi.num_srcs = exec_list_length(&phi->srcs);
1546
1547 /* Phi nodes are special, since they may reference SSA definitions and
1548 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1549 * and then store enough information so that a later fixup pass can fill
1550 * them in correctly.
1551 */
1552 write_dest(ctx, &phi->dest, header, phi->instr.type);
1553
1554 nir_foreach_phi_src(src, phi) {
1555 assert(src->src.is_ssa);
1556 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1557 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1558 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1559 write_phi_fixup fixup = {
1560 .blob_offset = blob_offset,
1561 .src = src->src.ssa,
1562 .block = src->pred,
1563 };
1564 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1565 }
1566 }
1567
1568 static void
1569 write_fixup_phis(write_ctx *ctx)
1570 {
1571 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1572 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1573 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1574 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1575 }
1576
1577 util_dynarray_clear(&ctx->phi_fixups);
1578 }
1579
1580 static nir_phi_instr *
1581 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1582 {
1583 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1584
1585 read_dest(ctx, &phi->dest, &phi->instr, header);
1586
1587 /* For similar reasons as before, we just store the index directly into the
1588 * pointer, and let a later pass resolve the phi sources.
1589 *
1590 * In order to ensure that the copied sources (which are just the indices
1591 * from the blob for now) don't get inserted into the old shader's use-def
1592 * lists, we have to add the phi instruction *before* we set up its
1593 * sources.
1594 */
1595 nir_instr_insert_after_block(blk, &phi->instr);
1596
1597 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1598 nir_phi_src *src = ralloc(phi, nir_phi_src);
1599
1600 src->src.is_ssa = true;
1601 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1602 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1603
1604 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1605 * we have to set the parent_instr manually. It doesn't really matter
1606 * when we do it, so we might as well do it here.
1607 */
1608 src->src.parent_instr = &phi->instr;
1609
1610 /* Stash it in the list of phi sources. We'll walk this list and fix up
1611 * sources at the very end of read_function_impl.
1612 */
1613 list_add(&src->src.use_link, &ctx->phi_srcs);
1614
1615 exec_list_push_tail(&phi->srcs, &src->node);
1616 }
1617
1618 return phi;
1619 }
1620
1621 static void
1622 read_fixup_phis(read_ctx *ctx)
1623 {
1624 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1625 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1626 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1627
1628 /* Remove from this list */
1629 list_del(&src->src.use_link);
1630
1631 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1632 }
1633 assert(list_is_empty(&ctx->phi_srcs));
1634 }
1635
1636 static void
1637 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1638 {
1639 assert(jmp->type < 4);
1640
1641 union packed_instr header;
1642 header.u32 = 0;
1643
1644 header.jump.instr_type = jmp->instr.type;
1645 header.jump.type = jmp->type;
1646
1647 blob_write_uint32(ctx->blob, header.u32);
1648 }
1649
1650 static nir_jump_instr *
1651 read_jump(read_ctx *ctx, union packed_instr header)
1652 {
1653 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1654 return jmp;
1655 }
1656
1657 static void
1658 write_call(write_ctx *ctx, const nir_call_instr *call)
1659 {
1660 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1661
1662 for (unsigned i = 0; i < call->num_params; i++)
1663 write_src(ctx, &call->params[i]);
1664 }
1665
1666 static nir_call_instr *
1667 read_call(read_ctx *ctx)
1668 {
1669 nir_function *callee = read_object(ctx);
1670 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1671
1672 for (unsigned i = 0; i < call->num_params; i++)
1673 read_src(ctx, &call->params[i], call);
1674
1675 return call;
1676 }
1677
1678 static void
1679 write_instr(write_ctx *ctx, const nir_instr *instr)
1680 {
1681 /* We have only 4 bits for the instruction type. */
1682 assert(instr->type < 16);
1683
1684 switch (instr->type) {
1685 case nir_instr_type_alu:
1686 write_alu(ctx, nir_instr_as_alu(instr));
1687 break;
1688 case nir_instr_type_deref:
1689 write_deref(ctx, nir_instr_as_deref(instr));
1690 break;
1691 case nir_instr_type_intrinsic:
1692 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1693 break;
1694 case nir_instr_type_load_const:
1695 write_load_const(ctx, nir_instr_as_load_const(instr));
1696 break;
1697 case nir_instr_type_ssa_undef:
1698 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1699 break;
1700 case nir_instr_type_tex:
1701 write_tex(ctx, nir_instr_as_tex(instr));
1702 break;
1703 case nir_instr_type_phi:
1704 write_phi(ctx, nir_instr_as_phi(instr));
1705 break;
1706 case nir_instr_type_jump:
1707 write_jump(ctx, nir_instr_as_jump(instr));
1708 break;
1709 case nir_instr_type_call:
1710 blob_write_uint32(ctx->blob, instr->type);
1711 write_call(ctx, nir_instr_as_call(instr));
1712 break;
1713 case nir_instr_type_parallel_copy:
1714 unreachable("Cannot write parallel copies");
1715 default:
1716 unreachable("bad instr type");
1717 }
1718 }
1719
1720 /* Return the number of instructions read. */
1721 static unsigned
1722 read_instr(read_ctx *ctx, nir_block *block)
1723 {
1724 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1725 union packed_instr header;
1726 header.u32 = blob_read_uint32(ctx->blob);
1727 nir_instr *instr;
1728
1729 switch (header.any.instr_type) {
1730 case nir_instr_type_alu:
1731 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1732 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1733 return header.alu.num_followup_alu_sharing_header + 1;
1734 case nir_instr_type_deref:
1735 instr = &read_deref(ctx, header)->instr;
1736 break;
1737 case nir_instr_type_intrinsic:
1738 instr = &read_intrinsic(ctx, header)->instr;
1739 break;
1740 case nir_instr_type_load_const:
1741 instr = &read_load_const(ctx, header)->instr;
1742 break;
1743 case nir_instr_type_ssa_undef:
1744 instr = &read_ssa_undef(ctx, header)->instr;
1745 break;
1746 case nir_instr_type_tex:
1747 instr = &read_tex(ctx, header)->instr;
1748 break;
1749 case nir_instr_type_phi:
1750 /* Phi instructions are a bit of a special case when reading because we
1751 * don't want inserting the instruction to automatically handle use/defs
1752 * for us. Instead, we need to wait until all the blocks/instructions
1753 * are read so that we can set their sources up.
1754 */
1755 read_phi(ctx, block, header);
1756 return 1;
1757 case nir_instr_type_jump:
1758 instr = &read_jump(ctx, header)->instr;
1759 break;
1760 case nir_instr_type_call:
1761 instr = &read_call(ctx)->instr;
1762 break;
1763 case nir_instr_type_parallel_copy:
1764 unreachable("Cannot read parallel copies");
1765 default:
1766 unreachable("bad instr type");
1767 }
1768
1769 nir_instr_insert_after_block(block, instr);
1770 return 1;
1771 }
1772
1773 static void
1774 write_block(write_ctx *ctx, const nir_block *block)
1775 {
1776 write_add_object(ctx, block);
1777 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1778
1779 ctx->last_instr_type = ~0;
1780 ctx->last_alu_header_offset = 0;
1781
1782 nir_foreach_instr(instr, block) {
1783 write_instr(ctx, instr);
1784 ctx->last_instr_type = instr->type;
1785 }
1786 }
1787
1788 static void
1789 read_block(read_ctx *ctx, struct exec_list *cf_list)
1790 {
1791 /* Don't actually create a new block. Just use the one from the tail of
1792 * the list. NIR guarantees that the tail of the list is a block and that
1793 * no two blocks are side-by-side in the IR; It should be empty.
1794 */
1795 nir_block *block =
1796 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1797
1798 read_add_object(ctx, block);
1799 unsigned num_instrs = blob_read_uint32(ctx->blob);
1800 for (unsigned i = 0; i < num_instrs;) {
1801 i += read_instr(ctx, block);
1802 }
1803 }
1804
1805 static void
1806 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1807
1808 static void
1809 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1810
1811 static void
1812 write_if(write_ctx *ctx, nir_if *nif)
1813 {
1814 write_src(ctx, &nif->condition);
1815
1816 write_cf_list(ctx, &nif->then_list);
1817 write_cf_list(ctx, &nif->else_list);
1818 }
1819
1820 static void
1821 read_if(read_ctx *ctx, struct exec_list *cf_list)
1822 {
1823 nir_if *nif = nir_if_create(ctx->nir);
1824
1825 read_src(ctx, &nif->condition, nif);
1826
1827 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1828
1829 read_cf_list(ctx, &nif->then_list);
1830 read_cf_list(ctx, &nif->else_list);
1831 }
1832
1833 static void
1834 write_loop(write_ctx *ctx, nir_loop *loop)
1835 {
1836 write_cf_list(ctx, &loop->body);
1837 }
1838
1839 static void
1840 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1841 {
1842 nir_loop *loop = nir_loop_create(ctx->nir);
1843
1844 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1845
1846 read_cf_list(ctx, &loop->body);
1847 }
1848
1849 static void
1850 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1851 {
1852 blob_write_uint32(ctx->blob, cf->type);
1853
1854 switch (cf->type) {
1855 case nir_cf_node_block:
1856 write_block(ctx, nir_cf_node_as_block(cf));
1857 break;
1858 case nir_cf_node_if:
1859 write_if(ctx, nir_cf_node_as_if(cf));
1860 break;
1861 case nir_cf_node_loop:
1862 write_loop(ctx, nir_cf_node_as_loop(cf));
1863 break;
1864 default:
1865 unreachable("bad cf type");
1866 }
1867 }
1868
1869 static void
1870 read_cf_node(read_ctx *ctx, struct exec_list *list)
1871 {
1872 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1873
1874 switch (type) {
1875 case nir_cf_node_block:
1876 read_block(ctx, list);
1877 break;
1878 case nir_cf_node_if:
1879 read_if(ctx, list);
1880 break;
1881 case nir_cf_node_loop:
1882 read_loop(ctx, list);
1883 break;
1884 default:
1885 unreachable("bad cf type");
1886 }
1887 }
1888
1889 static void
1890 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1891 {
1892 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1893 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1894 write_cf_node(ctx, cf);
1895 }
1896 }
1897
1898 static void
1899 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1900 {
1901 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1902 for (unsigned i = 0; i < num_cf_nodes; i++)
1903 read_cf_node(ctx, cf_list);
1904 }
1905
1906 static void
1907 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1908 {
1909 blob_write_uint8(ctx->blob, fi->structured);
1910
1911 write_var_list(ctx, &fi->locals);
1912 write_reg_list(ctx, &fi->registers);
1913 blob_write_uint32(ctx->blob, fi->reg_alloc);
1914
1915 write_cf_list(ctx, &fi->body);
1916 write_fixup_phis(ctx);
1917 }
1918
1919 static nir_function_impl *
1920 read_function_impl(read_ctx *ctx, nir_function *fxn)
1921 {
1922 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1923 fi->function = fxn;
1924
1925 fi->structured = blob_read_uint8(ctx->blob);
1926
1927 read_var_list(ctx, &fi->locals);
1928 read_reg_list(ctx, &fi->registers);
1929 fi->reg_alloc = blob_read_uint32(ctx->blob);
1930
1931 read_cf_list(ctx, &fi->body);
1932 read_fixup_phis(ctx);
1933
1934 fi->valid_metadata = 0;
1935
1936 return fi;
1937 }
1938
1939 static void
1940 write_function(write_ctx *ctx, const nir_function *fxn)
1941 {
1942 uint32_t flags = fxn->is_entrypoint;
1943 if (fxn->name)
1944 flags |= 0x2;
1945 if (fxn->impl)
1946 flags |= 0x4;
1947 blob_write_uint32(ctx->blob, flags);
1948 if (fxn->name)
1949 blob_write_string(ctx->blob, fxn->name);
1950
1951 write_add_object(ctx, fxn);
1952
1953 blob_write_uint32(ctx->blob, fxn->num_params);
1954 for (unsigned i = 0; i < fxn->num_params; i++) {
1955 uint32_t val =
1956 ((uint32_t)fxn->params[i].num_components) |
1957 ((uint32_t)fxn->params[i].bit_size) << 8;
1958 blob_write_uint32(ctx->blob, val);
1959 }
1960
1961 /* At first glance, it looks like we should write the function_impl here.
1962 * However, call instructions need to be able to reference at least the
1963 * function and those will get processed as we write the function_impls.
1964 * We stop here and write function_impls as a second pass.
1965 */
1966 }
1967
1968 static void
1969 read_function(read_ctx *ctx)
1970 {
1971 uint32_t flags = blob_read_uint32(ctx->blob);
1972 bool has_name = flags & 0x2;
1973 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1974
1975 nir_function *fxn = nir_function_create(ctx->nir, name);
1976
1977 read_add_object(ctx, fxn);
1978
1979 fxn->num_params = blob_read_uint32(ctx->blob);
1980 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1981 for (unsigned i = 0; i < fxn->num_params; i++) {
1982 uint32_t val = blob_read_uint32(ctx->blob);
1983 fxn->params[i].num_components = val & 0xff;
1984 fxn->params[i].bit_size = (val >> 8) & 0xff;
1985 }
1986
1987 fxn->is_entrypoint = flags & 0x1;
1988 if (flags & 0x4)
1989 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1990 }
1991
1992 /**
1993 * Serialize NIR into a binary blob.
1994 *
1995 * \param strip Don't serialize information only useful for debugging,
1996 * such as variable names, making cache hits from similar
1997 * shaders more likely.
1998 */
1999 void
2000 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2001 {
2002 write_ctx ctx = {0};
2003 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2004 ctx.blob = blob;
2005 ctx.nir = nir;
2006 ctx.strip = strip;
2007 util_dynarray_init(&ctx.phi_fixups, NULL);
2008
2009 size_t idx_size_offset = blob_reserve_uint32(blob);
2010
2011 struct shader_info info = nir->info;
2012 uint32_t strings = 0;
2013 if (!strip && info.name)
2014 strings |= 0x1;
2015 if (!strip && info.label)
2016 strings |= 0x2;
2017 blob_write_uint32(blob, strings);
2018 if (!strip && info.name)
2019 blob_write_string(blob, info.name);
2020 if (!strip && info.label)
2021 blob_write_string(blob, info.label);
2022 info.name = info.label = NULL;
2023 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2024
2025 write_var_list(&ctx, &nir->variables);
2026
2027 blob_write_uint32(blob, nir->num_inputs);
2028 blob_write_uint32(blob, nir->num_uniforms);
2029 blob_write_uint32(blob, nir->num_outputs);
2030 blob_write_uint32(blob, nir->shared_size);
2031 blob_write_uint32(blob, nir->scratch_size);
2032
2033 blob_write_uint32(blob, exec_list_length(&nir->functions));
2034 nir_foreach_function(fxn, nir) {
2035 write_function(&ctx, fxn);
2036 }
2037
2038 nir_foreach_function(fxn, nir) {
2039 if (fxn->impl)
2040 write_function_impl(&ctx, fxn->impl);
2041 }
2042
2043 blob_write_uint32(blob, nir->constant_data_size);
2044 if (nir->constant_data_size > 0)
2045 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2046
2047 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
2048
2049 _mesa_hash_table_destroy(ctx.remap_table, NULL);
2050 util_dynarray_fini(&ctx.phi_fixups);
2051 }
2052
2053 nir_shader *
2054 nir_deserialize(void *mem_ctx,
2055 const struct nir_shader_compiler_options *options,
2056 struct blob_reader *blob)
2057 {
2058 read_ctx ctx = {0};
2059 ctx.blob = blob;
2060 list_inithead(&ctx.phi_srcs);
2061 ctx.idx_table_len = blob_read_uint32(blob);
2062 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2063
2064 uint32_t strings = blob_read_uint32(blob);
2065 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2066 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2067
2068 struct shader_info info;
2069 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2070
2071 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2072
2073 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2074 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2075
2076 ctx.nir->info = info;
2077
2078 read_var_list(&ctx, &ctx.nir->variables);
2079
2080 ctx.nir->num_inputs = blob_read_uint32(blob);
2081 ctx.nir->num_uniforms = blob_read_uint32(blob);
2082 ctx.nir->num_outputs = blob_read_uint32(blob);
2083 ctx.nir->shared_size = blob_read_uint32(blob);
2084 ctx.nir->scratch_size = blob_read_uint32(blob);
2085
2086 unsigned num_functions = blob_read_uint32(blob);
2087 for (unsigned i = 0; i < num_functions; i++)
2088 read_function(&ctx);
2089
2090 nir_foreach_function(fxn, ctx.nir) {
2091 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2092 fxn->impl = read_function_impl(&ctx, fxn);
2093 }
2094
2095 ctx.nir->constant_data_size = blob_read_uint32(blob);
2096 if (ctx.nir->constant_data_size > 0) {
2097 ctx.nir->constant_data =
2098 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2099 blob_copy_bytes(blob, ctx.nir->constant_data,
2100 ctx.nir->constant_data_size);
2101 }
2102
2103 free(ctx.idx_table);
2104
2105 return ctx.nir;
2106 }
2107
2108 void
2109 nir_shader_serialize_deserialize(nir_shader *shader)
2110 {
2111 const struct nir_shader_compiler_options *options = shader->options;
2112
2113 struct blob writer;
2114 blob_init(&writer);
2115 nir_serialize(&writer, shader, false);
2116
2117 /* Delete all of dest's ralloc children but leave dest alone */
2118 void *dead_ctx = ralloc_context(NULL);
2119 ralloc_adopt(dead_ctx, shader);
2120 ralloc_free(dead_ctx);
2121
2122 dead_ctx = ralloc_context(NULL);
2123
2124 struct blob_reader reader;
2125 blob_reader_init(&reader, writer.data, writer.size);
2126 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2127
2128 blob_finish(&writer);
2129
2130 nir_shader_replace(shader, copy);
2131 ralloc_free(dead_ctx);
2132 }