nir: Add a new memory_barrier_tcs_patch intrinsic
[mesa.git] / src / compiler / nir / nir_serialize.c
1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
28
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
31
32 typedef struct {
33 size_t blob_offset;
34 nir_ssa_def *src;
35 nir_block *block;
36 } write_phi_fixup;
37
38 typedef struct {
39 const nir_shader *nir;
40
41 struct blob *blob;
42
43 /* maps pointer to index */
44 struct hash_table *remap_table;
45
46 /* the next index to assign to a NIR in-memory object */
47 uint32_t next_idx;
48
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
51 */
52 struct util_dynarray phi_fixups;
53
54 /* The last serialized type. */
55 const struct glsl_type *last_type;
56 const struct glsl_type *last_interface_type;
57 struct nir_variable_data last_var_data;
58
59 /* For skipping equal ALU headers (typical after scalarization). */
60 nir_instr_type last_instr_type;
61 uintptr_t last_alu_header_offset;
62
63 /* Don't write optional data such as variable names. */
64 bool strip;
65 } write_ctx;
66
67 typedef struct {
68 nir_shader *nir;
69
70 struct blob_reader *blob;
71
72 /* the next index to assign to a NIR in-memory object */
73 uint32_t next_idx;
74
75 /* The length of the index -> object table */
76 uint32_t idx_table_len;
77
78 /* map from index to deserialized pointer */
79 void **idx_table;
80
81 /* List of phi sources. */
82 struct list_head phi_srcs;
83
84 /* The last deserialized type. */
85 const struct glsl_type *last_type;
86 const struct glsl_type *last_interface_type;
87 struct nir_variable_data last_var_data;
88 } read_ctx;
89
90 static void
91 write_add_object(write_ctx *ctx, const void *obj)
92 {
93 uint32_t index = ctx->next_idx++;
94 assert(index != MAX_OBJECT_IDS);
95 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96 }
97
98 static uint32_t
99 write_lookup_object(write_ctx *ctx, const void *obj)
100 {
101 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102 assert(entry);
103 return (uint32_t)(uintptr_t) entry->data;
104 }
105
106 static void
107 read_add_object(read_ctx *ctx, void *obj)
108 {
109 assert(ctx->next_idx < ctx->idx_table_len);
110 ctx->idx_table[ctx->next_idx++] = obj;
111 }
112
113 static void *
114 read_lookup_object(read_ctx *ctx, uint32_t idx)
115 {
116 assert(idx < ctx->idx_table_len);
117 return ctx->idx_table[idx];
118 }
119
120 static void *
121 read_object(read_ctx *ctx)
122 {
123 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124 }
125
126 static uint32_t
127 encode_bit_size_3bits(uint8_t bit_size)
128 {
129 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131 if (bit_size)
132 return util_logbase2(bit_size) + 1;
133 return 0;
134 }
135
136 static uint8_t
137 decode_bit_size_3bits(uint8_t bit_size)
138 {
139 if (bit_size)
140 return 1 << (bit_size - 1);
141 return 0;
142 }
143
144 #define NUM_COMPONENTS_IS_SEPARATE_7 7
145
146 static uint8_t
147 encode_num_components_in_3bits(uint8_t num_components)
148 {
149 if (num_components <= 4)
150 return num_components;
151 if (num_components == 8)
152 return 5;
153 if (num_components == 16)
154 return 6;
155
156 /* special value indicating that num_components is in the next uint32 */
157 return NUM_COMPONENTS_IS_SEPARATE_7;
158 }
159
160 static uint8_t
161 decode_num_components_in_3bits(uint8_t value)
162 {
163 if (value <= 4)
164 return value;
165 if (value == 5)
166 return 8;
167 if (value == 6)
168 return 16;
169
170 unreachable("invalid num_components encoding");
171 return 0;
172 }
173
174 static void
175 write_constant(write_ctx *ctx, const nir_constant *c)
176 {
177 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178 blob_write_uint32(ctx->blob, c->num_elements);
179 for (unsigned i = 0; i < c->num_elements; i++)
180 write_constant(ctx, c->elements[i]);
181 }
182
183 static nir_constant *
184 read_constant(read_ctx *ctx, nir_variable *nvar)
185 {
186 nir_constant *c = ralloc(nvar, nir_constant);
187
188 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189 c->num_elements = blob_read_uint32(ctx->blob);
190 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191 for (unsigned i = 0; i < c->num_elements; i++)
192 c->elements[i] = read_constant(ctx, nvar);
193
194 return c;
195 }
196
197 enum var_data_encoding {
198 var_encode_full,
199 var_encode_shader_temp,
200 var_encode_function_temp,
201 var_encode_location_diff,
202 };
203
204 union packed_var {
205 uint32_t u32;
206 struct {
207 unsigned has_name:1;
208 unsigned has_constant_initializer:1;
209 unsigned has_interface_type:1;
210 unsigned num_state_slots:7;
211 unsigned data_encoding:2;
212 unsigned type_same_as_last:1;
213 unsigned interface_type_same_as_last:1;
214 unsigned _pad:2;
215 unsigned num_members:16;
216 } u;
217 };
218
219 union packed_var_data_diff {
220 uint32_t u32;
221 struct {
222 int location:13;
223 int location_frac:3;
224 int driver_location:16;
225 } u;
226 };
227
228 static void
229 write_variable(write_ctx *ctx, const nir_variable *var)
230 {
231 write_add_object(ctx, var);
232
233 assert(var->num_state_slots < (1 << 7));
234 assert(var->num_members < (1 << 16));
235
236 STATIC_ASSERT(sizeof(union packed_var) == 4);
237 union packed_var flags;
238 flags.u32 = 0;
239
240 flags.u.has_name = !ctx->strip && var->name;
241 flags.u.has_constant_initializer = !!(var->constant_initializer);
242 flags.u.has_interface_type = !!(var->interface_type);
243 flags.u.type_same_as_last = var->type == ctx->last_type;
244 flags.u.interface_type_same_as_last =
245 var->interface_type && var->interface_type == ctx->last_interface_type;
246 flags.u.num_state_slots = var->num_state_slots;
247 flags.u.num_members = var->num_members;
248
249 struct nir_variable_data data = var->data;
250
251 /* When stripping, we expect that the location is no longer needed,
252 * which is typically after shaders are linked.
253 */
254 if (ctx->strip &&
255 data.mode != nir_var_shader_in &&
256 data.mode != nir_var_shader_out)
257 data.location = 0;
258
259 /* Temporary variables don't serialize var->data. */
260 if (data.mode == nir_var_shader_temp)
261 flags.u.data_encoding = var_encode_shader_temp;
262 else if (data.mode == nir_var_function_temp)
263 flags.u.data_encoding = var_encode_function_temp;
264 else {
265 struct nir_variable_data tmp = data;
266
267 tmp.location = ctx->last_var_data.location;
268 tmp.location_frac = ctx->last_var_data.location_frac;
269 tmp.driver_location = ctx->last_var_data.driver_location;
270
271 /* See if we can encode only the difference in locations from the last
272 * variable.
273 */
274 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
275 abs((int)data.location -
276 (int)ctx->last_var_data.location) < (1 << 12) &&
277 abs((int)data.driver_location -
278 (int)ctx->last_var_data.driver_location) < (1 << 15))
279 flags.u.data_encoding = var_encode_location_diff;
280 else
281 flags.u.data_encoding = var_encode_full;
282 }
283
284 blob_write_uint32(ctx->blob, flags.u32);
285
286 if (!flags.u.type_same_as_last) {
287 encode_type_to_blob(ctx->blob, var->type);
288 ctx->last_type = var->type;
289 }
290
291 if (var->interface_type && !flags.u.interface_type_same_as_last) {
292 encode_type_to_blob(ctx->blob, var->interface_type);
293 ctx->last_interface_type = var->interface_type;
294 }
295
296 if (flags.u.has_name)
297 blob_write_string(ctx->blob, var->name);
298
299 if (flags.u.data_encoding == var_encode_full ||
300 flags.u.data_encoding == var_encode_location_diff) {
301 if (flags.u.data_encoding == var_encode_full) {
302 blob_write_bytes(ctx->blob, &data, sizeof(data));
303 } else {
304 /* Serialize only the difference in locations from the last variable.
305 */
306 union packed_var_data_diff diff;
307
308 diff.u.location = data.location - ctx->last_var_data.location;
309 diff.u.location_frac = data.location_frac -
310 ctx->last_var_data.location_frac;
311 diff.u.driver_location = data.driver_location -
312 ctx->last_var_data.driver_location;
313
314 blob_write_uint32(ctx->blob, diff.u32);
315 }
316
317 ctx->last_var_data = data;
318 }
319
320 for (unsigned i = 0; i < var->num_state_slots; i++) {
321 blob_write_bytes(ctx->blob, &var->state_slots[i],
322 sizeof(var->state_slots[i]));
323 }
324 if (var->constant_initializer)
325 write_constant(ctx, var->constant_initializer);
326 if (var->num_members > 0) {
327 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
328 var->num_members * sizeof(*var->members));
329 }
330 }
331
332 static nir_variable *
333 read_variable(read_ctx *ctx)
334 {
335 nir_variable *var = rzalloc(ctx->nir, nir_variable);
336 read_add_object(ctx, var);
337
338 union packed_var flags;
339 flags.u32 = blob_read_uint32(ctx->blob);
340
341 if (flags.u.type_same_as_last) {
342 var->type = ctx->last_type;
343 } else {
344 var->type = decode_type_from_blob(ctx->blob);
345 ctx->last_type = var->type;
346 }
347
348 if (flags.u.has_interface_type) {
349 if (flags.u.interface_type_same_as_last) {
350 var->interface_type = ctx->last_interface_type;
351 } else {
352 var->interface_type = decode_type_from_blob(ctx->blob);
353 ctx->last_interface_type = var->interface_type;
354 }
355 }
356
357 if (flags.u.has_name) {
358 const char *name = blob_read_string(ctx->blob);
359 var->name = ralloc_strdup(var, name);
360 } else {
361 var->name = NULL;
362 }
363
364 if (flags.u.data_encoding == var_encode_shader_temp)
365 var->data.mode = nir_var_shader_temp;
366 else if (flags.u.data_encoding == var_encode_function_temp)
367 var->data.mode = nir_var_function_temp;
368 else if (flags.u.data_encoding == var_encode_full) {
369 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
370 ctx->last_var_data = var->data;
371 } else { /* var_encode_location_diff */
372 union packed_var_data_diff diff;
373 diff.u32 = blob_read_uint32(ctx->blob);
374
375 var->data = ctx->last_var_data;
376 var->data.location += diff.u.location;
377 var->data.location_frac += diff.u.location_frac;
378 var->data.driver_location += diff.u.driver_location;
379
380 ctx->last_var_data = var->data;
381 }
382
383 var->num_state_slots = flags.u.num_state_slots;
384 if (var->num_state_slots != 0) {
385 var->state_slots = ralloc_array(var, nir_state_slot,
386 var->num_state_slots);
387 for (unsigned i = 0; i < var->num_state_slots; i++) {
388 blob_copy_bytes(ctx->blob, &var->state_slots[i],
389 sizeof(var->state_slots[i]));
390 }
391 }
392 if (flags.u.has_constant_initializer)
393 var->constant_initializer = read_constant(ctx, var);
394 else
395 var->constant_initializer = NULL;
396 var->num_members = flags.u.num_members;
397 if (var->num_members > 0) {
398 var->members = ralloc_array(var, struct nir_variable_data,
399 var->num_members);
400 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
401 var->num_members * sizeof(*var->members));
402 }
403
404 return var;
405 }
406
407 static void
408 write_var_list(write_ctx *ctx, const struct exec_list *src)
409 {
410 blob_write_uint32(ctx->blob, exec_list_length(src));
411 foreach_list_typed(nir_variable, var, node, src) {
412 write_variable(ctx, var);
413 }
414 }
415
416 static void
417 read_var_list(read_ctx *ctx, struct exec_list *dst)
418 {
419 exec_list_make_empty(dst);
420 unsigned num_vars = blob_read_uint32(ctx->blob);
421 for (unsigned i = 0; i < num_vars; i++) {
422 nir_variable *var = read_variable(ctx);
423 exec_list_push_tail(dst, &var->node);
424 }
425 }
426
427 static void
428 write_register(write_ctx *ctx, const nir_register *reg)
429 {
430 write_add_object(ctx, reg);
431 blob_write_uint32(ctx->blob, reg->num_components);
432 blob_write_uint32(ctx->blob, reg->bit_size);
433 blob_write_uint32(ctx->blob, reg->num_array_elems);
434 blob_write_uint32(ctx->blob, reg->index);
435 blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
436 if (!ctx->strip && reg->name)
437 blob_write_string(ctx->blob, reg->name);
438 }
439
440 static nir_register *
441 read_register(read_ctx *ctx)
442 {
443 nir_register *reg = ralloc(ctx->nir, nir_register);
444 read_add_object(ctx, reg);
445 reg->num_components = blob_read_uint32(ctx->blob);
446 reg->bit_size = blob_read_uint32(ctx->blob);
447 reg->num_array_elems = blob_read_uint32(ctx->blob);
448 reg->index = blob_read_uint32(ctx->blob);
449 bool has_name = blob_read_uint32(ctx->blob);
450 if (has_name) {
451 const char *name = blob_read_string(ctx->blob);
452 reg->name = ralloc_strdup(reg, name);
453 } else {
454 reg->name = NULL;
455 }
456
457 list_inithead(&reg->uses);
458 list_inithead(&reg->defs);
459 list_inithead(&reg->if_uses);
460
461 return reg;
462 }
463
464 static void
465 write_reg_list(write_ctx *ctx, const struct exec_list *src)
466 {
467 blob_write_uint32(ctx->blob, exec_list_length(src));
468 foreach_list_typed(nir_register, reg, node, src)
469 write_register(ctx, reg);
470 }
471
472 static void
473 read_reg_list(read_ctx *ctx, struct exec_list *dst)
474 {
475 exec_list_make_empty(dst);
476 unsigned num_regs = blob_read_uint32(ctx->blob);
477 for (unsigned i = 0; i < num_regs; i++) {
478 nir_register *reg = read_register(ctx);
479 exec_list_push_tail(dst, &reg->node);
480 }
481 }
482
483 union packed_src {
484 uint32_t u32;
485 struct {
486 unsigned is_ssa:1; /* <-- Header */
487 unsigned is_indirect:1;
488 unsigned object_idx:20;
489 unsigned _footer:10; /* <-- Footer */
490 } any;
491 struct {
492 unsigned _header:22; /* <-- Header */
493 unsigned negate:1; /* <-- Footer */
494 unsigned abs:1;
495 unsigned swizzle_x:2;
496 unsigned swizzle_y:2;
497 unsigned swizzle_z:2;
498 unsigned swizzle_w:2;
499 } alu;
500 struct {
501 unsigned _header:22; /* <-- Header */
502 unsigned src_type:5; /* <-- Footer */
503 unsigned _pad:5;
504 } tex;
505 };
506
507 static void
508 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
509 {
510 /* Since sources are very frequent, we try to save some space when storing
511 * them. In particular, we store whether the source is a register and
512 * whether the register has an indirect index in the low two bits. We can
513 * assume that the high two bits of the index are zero, since otherwise our
514 * address space would've been exhausted allocating the remap table!
515 */
516 header.any.is_ssa = src->is_ssa;
517 if (src->is_ssa) {
518 header.any.object_idx = write_lookup_object(ctx, src->ssa);
519 blob_write_uint32(ctx->blob, header.u32);
520 } else {
521 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
522 header.any.is_indirect = !!src->reg.indirect;
523 blob_write_uint32(ctx->blob, header.u32);
524 blob_write_uint32(ctx->blob, src->reg.base_offset);
525 if (src->reg.indirect) {
526 union packed_src header = {0};
527 write_src_full(ctx, src->reg.indirect, header);
528 }
529 }
530 }
531
532 static void
533 write_src(write_ctx *ctx, const nir_src *src)
534 {
535 union packed_src header = {0};
536 write_src_full(ctx, src, header);
537 }
538
539 static union packed_src
540 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
541 {
542 STATIC_ASSERT(sizeof(union packed_src) == 4);
543 union packed_src header;
544 header.u32 = blob_read_uint32(ctx->blob);
545
546 src->is_ssa = header.any.is_ssa;
547 if (src->is_ssa) {
548 src->ssa = read_lookup_object(ctx, header.any.object_idx);
549 } else {
550 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
551 src->reg.base_offset = blob_read_uint32(ctx->blob);
552 if (header.any.is_indirect) {
553 src->reg.indirect = ralloc(mem_ctx, nir_src);
554 read_src(ctx, src->reg.indirect, mem_ctx);
555 } else {
556 src->reg.indirect = NULL;
557 }
558 }
559 return header;
560 }
561
562 union packed_dest {
563 uint8_t u8;
564 struct {
565 uint8_t is_ssa:1;
566 uint8_t has_name:1;
567 uint8_t num_components:3;
568 uint8_t bit_size:3;
569 } ssa;
570 struct {
571 uint8_t is_ssa:1;
572 uint8_t is_indirect:1;
573 uint8_t _pad:6;
574 } reg;
575 };
576
577 enum intrinsic_const_indices_encoding {
578 /* Use the 9 bits of packed_const_indices to store 1-9 indices.
579 * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
580 * 4 2-bit indices, or 5-9 1-bit indices.
581 *
582 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
583 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
584 */
585 const_indices_9bit_all_combined,
586
587 const_indices_8bit, /* 8 bits per element */
588 const_indices_16bit, /* 16 bits per element */
589 const_indices_32bit, /* 32 bits per element */
590 };
591
592 enum load_const_packing {
593 /* Constants are not packed and are stored in following dwords. */
594 load_const_full,
595
596 /* packed_value contains high 19 bits, low bits are 0,
597 * good for floating-point decimals
598 */
599 load_const_scalar_hi_19bits,
600
601 /* packed_value contains low 19 bits, high bits are sign-extended */
602 load_const_scalar_lo_19bits_sext,
603 };
604
605 union packed_instr {
606 uint32_t u32;
607 struct {
608 unsigned instr_type:4; /* always present */
609 unsigned _pad:20;
610 unsigned dest:8; /* always last */
611 } any;
612 struct {
613 unsigned instr_type:4;
614 unsigned exact:1;
615 unsigned no_signed_wrap:1;
616 unsigned no_unsigned_wrap:1;
617 unsigned saturate:1;
618 /* Reg: writemask; SSA: swizzles for 2 srcs */
619 unsigned writemask_or_two_swizzles:4;
620 unsigned op:9;
621 unsigned packed_src_ssa_16bit:1;
622 /* Scalarized ALUs always have the same header. */
623 unsigned num_followup_alu_sharing_header:2;
624 unsigned dest:8;
625 } alu;
626 struct {
627 unsigned instr_type:4;
628 unsigned deref_type:3;
629 unsigned cast_type_same_as_last:1;
630 unsigned mode:10; /* deref_var redefines this */
631 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
632 unsigned _pad:5; /* deref_var redefines this */
633 unsigned dest:8;
634 } deref;
635 struct {
636 unsigned instr_type:4;
637 unsigned deref_type:3;
638 unsigned _pad:1;
639 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
640 unsigned dest:8;
641 } deref_var;
642 struct {
643 unsigned instr_type:4;
644 unsigned intrinsic:9;
645 unsigned const_indices_encoding:2;
646 unsigned packed_const_indices:9;
647 unsigned dest:8;
648 } intrinsic;
649 struct {
650 unsigned instr_type:4;
651 unsigned last_component:4;
652 unsigned bit_size:3;
653 unsigned packing:2; /* enum load_const_packing */
654 unsigned packed_value:19; /* meaning determined by packing */
655 } load_const;
656 struct {
657 unsigned instr_type:4;
658 unsigned last_component:4;
659 unsigned bit_size:3;
660 unsigned _pad:21;
661 } undef;
662 struct {
663 unsigned instr_type:4;
664 unsigned num_srcs:4;
665 unsigned op:4;
666 unsigned texture_array_size:12;
667 unsigned dest:8;
668 } tex;
669 struct {
670 unsigned instr_type:4;
671 unsigned num_srcs:20;
672 unsigned dest:8;
673 } phi;
674 struct {
675 unsigned instr_type:4;
676 unsigned type:2;
677 unsigned _pad:26;
678 } jump;
679 };
680
681 /* Write "lo24" as low 24 bits in the first uint32. */
682 static void
683 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
684 nir_instr_type instr_type)
685 {
686 STATIC_ASSERT(sizeof(union packed_dest) == 1);
687 union packed_dest dest;
688 dest.u8 = 0;
689
690 dest.ssa.is_ssa = dst->is_ssa;
691 if (dst->is_ssa) {
692 dest.ssa.has_name = !ctx->strip && dst->ssa.name;
693 dest.ssa.num_components =
694 encode_num_components_in_3bits(dst->ssa.num_components);
695 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
696 } else {
697 dest.reg.is_indirect = !!(dst->reg.indirect);
698 }
699 header.any.dest = dest.u8;
700
701 /* Check if the current ALU instruction has the same header as the previous
702 * instruction that is also ALU. If it is, we don't have to write
703 * the current header. This is a typical occurence after scalarization.
704 */
705 if (instr_type == nir_instr_type_alu) {
706 bool equal_header = false;
707
708 if (ctx->last_instr_type == nir_instr_type_alu) {
709 assert(ctx->last_alu_header_offset);
710 union packed_instr *last_header =
711 (union packed_instr *)(ctx->blob->data +
712 ctx->last_alu_header_offset);
713
714 /* Clear the field that counts ALUs with equal headers. */
715 union packed_instr clean_header;
716 clean_header.u32 = last_header->u32;
717 clean_header.alu.num_followup_alu_sharing_header = 0;
718
719 /* There can be at most 4 consecutive ALU instructions
720 * sharing the same header.
721 */
722 if (last_header->alu.num_followup_alu_sharing_header < 3 &&
723 header.u32 == clean_header.u32) {
724 last_header->alu.num_followup_alu_sharing_header++;
725 equal_header = true;
726 }
727 }
728
729 if (!equal_header) {
730 ctx->last_alu_header_offset = ctx->blob->size;
731 blob_write_uint32(ctx->blob, header.u32);
732 }
733 } else {
734 blob_write_uint32(ctx->blob, header.u32);
735 }
736
737 if (dest.ssa.is_ssa &&
738 dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
739 blob_write_uint32(ctx->blob, dst->ssa.num_components);
740
741 if (dst->is_ssa) {
742 write_add_object(ctx, &dst->ssa);
743 if (dest.ssa.has_name)
744 blob_write_string(ctx->blob, dst->ssa.name);
745 } else {
746 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
747 blob_write_uint32(ctx->blob, dst->reg.base_offset);
748 if (dst->reg.indirect)
749 write_src(ctx, dst->reg.indirect);
750 }
751 }
752
753 static void
754 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
755 union packed_instr header)
756 {
757 union packed_dest dest;
758 dest.u8 = header.any.dest;
759
760 if (dest.ssa.is_ssa) {
761 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
762 unsigned num_components;
763 if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
764 num_components = blob_read_uint32(ctx->blob);
765 else
766 num_components = decode_num_components_in_3bits(dest.ssa.num_components);
767 char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
768 nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
769 read_add_object(ctx, &dst->ssa);
770 } else {
771 dst->reg.reg = read_object(ctx);
772 dst->reg.base_offset = blob_read_uint32(ctx->blob);
773 if (dest.reg.is_indirect) {
774 dst->reg.indirect = ralloc(instr, nir_src);
775 read_src(ctx, dst->reg.indirect, instr);
776 }
777 }
778 }
779
780 static bool
781 are_object_ids_16bit(write_ctx *ctx)
782 {
783 /* Check the highest object ID, because they are monotonic. */
784 return ctx->next_idx < (1 << 16);
785 }
786
787 static bool
788 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
789 {
790 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
791
792 for (unsigned i = 0; i < num_srcs; i++) {
793 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
794 return false;
795
796 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
797
798 for (unsigned chan = 0; chan < src_components; chan++) {
799 /* The swizzles for src0.x and src1.x are stored
800 * in writemask_or_two_swizzles for SSA ALUs.
801 */
802 if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
803 alu->src[i].swizzle[chan] < 4)
804 continue;
805
806 if (alu->src[i].swizzle[chan] != chan)
807 return false;
808 }
809 }
810
811 return are_object_ids_16bit(ctx);
812 }
813
814 static void
815 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
816 {
817 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
818 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
819
820 /* 9 bits for nir_op */
821 STATIC_ASSERT(nir_num_opcodes <= 512);
822 union packed_instr header;
823 header.u32 = 0;
824
825 header.alu.instr_type = alu->instr.type;
826 header.alu.exact = alu->exact;
827 header.alu.no_signed_wrap = alu->no_signed_wrap;
828 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
829 header.alu.saturate = alu->dest.saturate;
830 header.alu.op = alu->op;
831 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
832
833 if (header.alu.packed_src_ssa_16bit &&
834 alu->dest.dest.is_ssa) {
835 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
836 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
837 if (num_srcs > 1)
838 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
839 } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
840 /* For vec4 registers, this field is a writemask. */
841 header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
842 }
843
844 write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
845
846 if (!alu->dest.dest.is_ssa && dst_components > 4)
847 blob_write_uint32(ctx->blob, alu->dest.write_mask);
848
849 if (header.alu.packed_src_ssa_16bit) {
850 for (unsigned i = 0; i < num_srcs; i++) {
851 assert(alu->src[i].src.is_ssa);
852 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
853 assert(idx < (1 << 16));
854 blob_write_uint16(ctx->blob, idx);
855 }
856 } else {
857 for (unsigned i = 0; i < num_srcs; i++) {
858 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
859 unsigned src_components = nir_src_num_components(alu->src[i].src);
860 union packed_src src;
861 bool packed = src_components <= 4 && src_channels <= 4;
862 src.u32 = 0;
863
864 src.alu.negate = alu->src[i].negate;
865 src.alu.abs = alu->src[i].abs;
866
867 if (packed) {
868 src.alu.swizzle_x = alu->src[i].swizzle[0];
869 src.alu.swizzle_y = alu->src[i].swizzle[1];
870 src.alu.swizzle_z = alu->src[i].swizzle[2];
871 src.alu.swizzle_w = alu->src[i].swizzle[3];
872 }
873
874 write_src_full(ctx, &alu->src[i].src, src);
875
876 /* Store swizzles for vec8 and vec16. */
877 if (!packed) {
878 for (unsigned o = 0; o < src_channels; o += 8) {
879 unsigned value = 0;
880
881 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
882 value |= (uint32_t)alu->src[i].swizzle[o + j] <<
883 (4 * j); /* 4 bits per swizzle */
884 }
885
886 blob_write_uint32(ctx->blob, value);
887 }
888 }
889 }
890 }
891 }
892
893 static nir_alu_instr *
894 read_alu(read_ctx *ctx, union packed_instr header)
895 {
896 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
897 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
898
899 alu->exact = header.alu.exact;
900 alu->no_signed_wrap = header.alu.no_signed_wrap;
901 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
902 alu->dest.saturate = header.alu.saturate;
903
904 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
905
906 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
907
908 if (alu->dest.dest.is_ssa) {
909 alu->dest.write_mask = u_bit_consecutive(0, dst_components);
910 } else if (dst_components <= 4) {
911 alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
912 } else {
913 alu->dest.write_mask = blob_read_uint32(ctx->blob);
914 }
915
916 if (header.alu.packed_src_ssa_16bit) {
917 for (unsigned i = 0; i < num_srcs; i++) {
918 nir_alu_src *src = &alu->src[i];
919 src->src.is_ssa = true;
920 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
921
922 memset(&src->swizzle, 0, sizeof(src->swizzle));
923
924 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
925
926 for (unsigned chan = 0; chan < src_components; chan++)
927 src->swizzle[chan] = chan;
928 }
929 } else {
930 for (unsigned i = 0; i < num_srcs; i++) {
931 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
932 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
933 unsigned src_components = nir_src_num_components(alu->src[i].src);
934 bool packed = src_components <= 4 && src_channels <= 4;
935
936 alu->src[i].negate = src.alu.negate;
937 alu->src[i].abs = src.alu.abs;
938
939 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
940
941 if (packed) {
942 alu->src[i].swizzle[0] = src.alu.swizzle_x;
943 alu->src[i].swizzle[1] = src.alu.swizzle_y;
944 alu->src[i].swizzle[2] = src.alu.swizzle_z;
945 alu->src[i].swizzle[3] = src.alu.swizzle_w;
946 } else {
947 /* Load swizzles for vec8 and vec16. */
948 for (unsigned o = 0; o < src_channels; o += 8) {
949 unsigned value = blob_read_uint32(ctx->blob);
950
951 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
952 alu->src[i].swizzle[o + j] =
953 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
954 }
955 }
956 }
957 }
958 }
959
960 if (header.alu.packed_src_ssa_16bit &&
961 alu->dest.dest.is_ssa) {
962 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
963 if (num_srcs > 1)
964 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
965 }
966
967 return alu;
968 }
969
970 static void
971 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
972 {
973 assert(deref->deref_type < 8);
974 assert(deref->mode < (1 << 10));
975
976 union packed_instr header;
977 header.u32 = 0;
978
979 header.deref.instr_type = deref->instr.type;
980 header.deref.deref_type = deref->deref_type;
981
982 if (deref->deref_type == nir_deref_type_cast) {
983 header.deref.mode = deref->mode;
984 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
985 }
986
987 unsigned var_idx = 0;
988 if (deref->deref_type == nir_deref_type_var) {
989 var_idx = write_lookup_object(ctx, deref->var);
990 if (var_idx && var_idx < (1 << 16))
991 header.deref_var.object_idx = var_idx;
992 }
993
994 if (deref->deref_type == nir_deref_type_array ||
995 deref->deref_type == nir_deref_type_ptr_as_array) {
996 header.deref.packed_src_ssa_16bit =
997 deref->parent.is_ssa && deref->arr.index.is_ssa &&
998 are_object_ids_16bit(ctx);
999 }
1000
1001 write_dest(ctx, &deref->dest, header, deref->instr.type);
1002
1003 switch (deref->deref_type) {
1004 case nir_deref_type_var:
1005 if (!header.deref_var.object_idx)
1006 blob_write_uint32(ctx->blob, var_idx);
1007 break;
1008
1009 case nir_deref_type_struct:
1010 write_src(ctx, &deref->parent);
1011 blob_write_uint32(ctx->blob, deref->strct.index);
1012 break;
1013
1014 case nir_deref_type_array:
1015 case nir_deref_type_ptr_as_array:
1016 if (header.deref.packed_src_ssa_16bit) {
1017 blob_write_uint16(ctx->blob,
1018 write_lookup_object(ctx, deref->parent.ssa));
1019 blob_write_uint16(ctx->blob,
1020 write_lookup_object(ctx, deref->arr.index.ssa));
1021 } else {
1022 write_src(ctx, &deref->parent);
1023 write_src(ctx, &deref->arr.index);
1024 }
1025 break;
1026
1027 case nir_deref_type_cast:
1028 write_src(ctx, &deref->parent);
1029 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1030 if (!header.deref.cast_type_same_as_last) {
1031 encode_type_to_blob(ctx->blob, deref->type);
1032 ctx->last_type = deref->type;
1033 }
1034 break;
1035
1036 case nir_deref_type_array_wildcard:
1037 write_src(ctx, &deref->parent);
1038 break;
1039
1040 default:
1041 unreachable("Invalid deref type");
1042 }
1043 }
1044
1045 static nir_deref_instr *
1046 read_deref(read_ctx *ctx, union packed_instr header)
1047 {
1048 nir_deref_type deref_type = header.deref.deref_type;
1049 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1050
1051 read_dest(ctx, &deref->dest, &deref->instr, header);
1052
1053 nir_deref_instr *parent;
1054
1055 switch (deref->deref_type) {
1056 case nir_deref_type_var:
1057 if (header.deref_var.object_idx)
1058 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1059 else
1060 deref->var = read_object(ctx);
1061
1062 deref->type = deref->var->type;
1063 break;
1064
1065 case nir_deref_type_struct:
1066 read_src(ctx, &deref->parent, &deref->instr);
1067 parent = nir_src_as_deref(deref->parent);
1068 deref->strct.index = blob_read_uint32(ctx->blob);
1069 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1070 break;
1071
1072 case nir_deref_type_array:
1073 case nir_deref_type_ptr_as_array:
1074 if (header.deref.packed_src_ssa_16bit) {
1075 deref->parent.is_ssa = true;
1076 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1077 deref->arr.index.is_ssa = true;
1078 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1079 } else {
1080 read_src(ctx, &deref->parent, &deref->instr);
1081 read_src(ctx, &deref->arr.index, &deref->instr);
1082 }
1083
1084 parent = nir_src_as_deref(deref->parent);
1085 if (deref->deref_type == nir_deref_type_array)
1086 deref->type = glsl_get_array_element(parent->type);
1087 else
1088 deref->type = parent->type;
1089 break;
1090
1091 case nir_deref_type_cast:
1092 read_src(ctx, &deref->parent, &deref->instr);
1093 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1094 if (header.deref.cast_type_same_as_last) {
1095 deref->type = ctx->last_type;
1096 } else {
1097 deref->type = decode_type_from_blob(ctx->blob);
1098 ctx->last_type = deref->type;
1099 }
1100 break;
1101
1102 case nir_deref_type_array_wildcard:
1103 read_src(ctx, &deref->parent, &deref->instr);
1104 parent = nir_src_as_deref(deref->parent);
1105 deref->type = glsl_get_array_element(parent->type);
1106 break;
1107
1108 default:
1109 unreachable("Invalid deref type");
1110 }
1111
1112 if (deref_type == nir_deref_type_var) {
1113 deref->mode = deref->var->data.mode;
1114 } else if (deref->deref_type == nir_deref_type_cast) {
1115 deref->mode = header.deref.mode;
1116 } else {
1117 assert(deref->parent.is_ssa);
1118 deref->mode = nir_instr_as_deref(deref->parent.ssa->parent_instr)->mode;
1119 }
1120
1121 return deref;
1122 }
1123
1124 static void
1125 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1126 {
1127 /* 9 bits for nir_intrinsic_op */
1128 STATIC_ASSERT(nir_num_intrinsics <= 512);
1129 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1130 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1131 assert(intrin->intrinsic < 512);
1132
1133 union packed_instr header;
1134 header.u32 = 0;
1135
1136 header.intrinsic.instr_type = intrin->instr.type;
1137 header.intrinsic.intrinsic = intrin->intrinsic;
1138
1139 /* Analyze constant indices to decide how to encode them. */
1140 if (num_indices) {
1141 unsigned max_bits = 0;
1142 for (unsigned i = 0; i < num_indices; i++) {
1143 unsigned max = util_last_bit(intrin->const_index[i]);
1144 max_bits = MAX2(max_bits, max);
1145 }
1146
1147 if (max_bits * num_indices <= 9) {
1148 header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
1149
1150 /* Pack all const indices into 6 bits. */
1151 unsigned bit_size = 9 / num_indices;
1152 for (unsigned i = 0; i < num_indices; i++) {
1153 header.intrinsic.packed_const_indices |=
1154 intrin->const_index[i] << (i * bit_size);
1155 }
1156 } else if (max_bits <= 8)
1157 header.intrinsic.const_indices_encoding = const_indices_8bit;
1158 else if (max_bits <= 16)
1159 header.intrinsic.const_indices_encoding = const_indices_16bit;
1160 else
1161 header.intrinsic.const_indices_encoding = const_indices_32bit;
1162 }
1163
1164 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1165 write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1166 else
1167 blob_write_uint32(ctx->blob, header.u32);
1168
1169 for (unsigned i = 0; i < num_srcs; i++)
1170 write_src(ctx, &intrin->src[i]);
1171
1172 if (num_indices) {
1173 switch (header.intrinsic.const_indices_encoding) {
1174 case const_indices_8bit:
1175 for (unsigned i = 0; i < num_indices; i++)
1176 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1177 break;
1178 case const_indices_16bit:
1179 for (unsigned i = 0; i < num_indices; i++)
1180 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1181 break;
1182 case const_indices_32bit:
1183 for (unsigned i = 0; i < num_indices; i++)
1184 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1185 break;
1186 }
1187 }
1188 }
1189
1190 static nir_intrinsic_instr *
1191 read_intrinsic(read_ctx *ctx, union packed_instr header)
1192 {
1193 nir_intrinsic_op op = header.intrinsic.intrinsic;
1194 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1195
1196 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1197 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1198
1199 if (nir_intrinsic_infos[op].has_dest)
1200 read_dest(ctx, &intrin->dest, &intrin->instr, header);
1201
1202 for (unsigned i = 0; i < num_srcs; i++)
1203 read_src(ctx, &intrin->src[i], &intrin->instr);
1204
1205 /* Vectorized instrinsics have num_components same as dst or src that has
1206 * 0 components in the info. Find it.
1207 */
1208 if (nir_intrinsic_infos[op].has_dest &&
1209 nir_intrinsic_infos[op].dest_components == 0) {
1210 intrin->num_components = nir_dest_num_components(intrin->dest);
1211 } else {
1212 for (unsigned i = 0; i < num_srcs; i++) {
1213 if (nir_intrinsic_infos[op].src_components[i] == 0) {
1214 intrin->num_components = nir_src_num_components(intrin->src[i]);
1215 break;
1216 }
1217 }
1218 }
1219
1220 if (num_indices) {
1221 switch (header.intrinsic.const_indices_encoding) {
1222 case const_indices_9bit_all_combined: {
1223 unsigned bit_size = 9 / num_indices;
1224 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1225 for (unsigned i = 0; i < num_indices; i++) {
1226 intrin->const_index[i] =
1227 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1228 bit_mask;
1229 }
1230 break;
1231 }
1232 case const_indices_8bit:
1233 for (unsigned i = 0; i < num_indices; i++)
1234 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1235 break;
1236 case const_indices_16bit:
1237 for (unsigned i = 0; i < num_indices; i++)
1238 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1239 break;
1240 case const_indices_32bit:
1241 for (unsigned i = 0; i < num_indices; i++)
1242 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1243 break;
1244 }
1245 }
1246
1247 return intrin;
1248 }
1249
1250 static void
1251 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1252 {
1253 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1254 union packed_instr header;
1255 header.u32 = 0;
1256
1257 header.load_const.instr_type = lc->instr.type;
1258 header.load_const.last_component = lc->def.num_components - 1;
1259 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1260 header.load_const.packing = load_const_full;
1261
1262 /* Try to pack 1-component constants into the 19 free bits in the header. */
1263 if (lc->def.num_components == 1) {
1264 switch (lc->def.bit_size) {
1265 case 64:
1266 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1267 /* packed_value contains high 19 bits, low bits are 0 */
1268 header.load_const.packing = load_const_scalar_hi_19bits;
1269 header.load_const.packed_value = lc->value[0].u64 >> 45;
1270 } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1271 /* packed_value contains low 19 bits, high bits are sign-extended */
1272 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1273 header.load_const.packed_value = lc->value[0].u64;
1274 }
1275 break;
1276
1277 case 32:
1278 if ((lc->value[0].u32 & 0x1fff) == 0) {
1279 header.load_const.packing = load_const_scalar_hi_19bits;
1280 header.load_const.packed_value = lc->value[0].u32 >> 13;
1281 } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1282 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1283 header.load_const.packed_value = lc->value[0].u32;
1284 }
1285 break;
1286
1287 case 16:
1288 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1289 header.load_const.packed_value = lc->value[0].u16;
1290 break;
1291 case 8:
1292 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1293 header.load_const.packed_value = lc->value[0].u8;
1294 break;
1295 case 1:
1296 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1297 header.load_const.packed_value = lc->value[0].b;
1298 break;
1299 default:
1300 unreachable("invalid bit_size");
1301 }
1302 }
1303
1304 blob_write_uint32(ctx->blob, header.u32);
1305
1306 if (header.load_const.packing == load_const_full) {
1307 switch (lc->def.bit_size) {
1308 case 64:
1309 blob_write_bytes(ctx->blob, lc->value,
1310 sizeof(*lc->value) * lc->def.num_components);
1311 break;
1312
1313 case 32:
1314 for (unsigned i = 0; i < lc->def.num_components; i++)
1315 blob_write_uint32(ctx->blob, lc->value[i].u32);
1316 break;
1317
1318 case 16:
1319 for (unsigned i = 0; i < lc->def.num_components; i++)
1320 blob_write_uint16(ctx->blob, lc->value[i].u16);
1321 break;
1322
1323 default:
1324 assert(lc->def.bit_size <= 8);
1325 for (unsigned i = 0; i < lc->def.num_components; i++)
1326 blob_write_uint8(ctx->blob, lc->value[i].u8);
1327 break;
1328 }
1329 }
1330
1331 write_add_object(ctx, &lc->def);
1332 }
1333
1334 static nir_load_const_instr *
1335 read_load_const(read_ctx *ctx, union packed_instr header)
1336 {
1337 nir_load_const_instr *lc =
1338 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1339 decode_bit_size_3bits(header.load_const.bit_size));
1340
1341 switch (header.load_const.packing) {
1342 case load_const_scalar_hi_19bits:
1343 switch (lc->def.bit_size) {
1344 case 64:
1345 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1346 break;
1347 case 32:
1348 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1349 break;
1350 default:
1351 unreachable("invalid bit_size");
1352 }
1353 break;
1354
1355 case load_const_scalar_lo_19bits_sext:
1356 switch (lc->def.bit_size) {
1357 case 64:
1358 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1359 break;
1360 case 32:
1361 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1362 break;
1363 case 16:
1364 lc->value[0].u16 = header.load_const.packed_value;
1365 break;
1366 case 8:
1367 lc->value[0].u8 = header.load_const.packed_value;
1368 break;
1369 case 1:
1370 lc->value[0].b = header.load_const.packed_value;
1371 break;
1372 default:
1373 unreachable("invalid bit_size");
1374 }
1375 break;
1376
1377 case load_const_full:
1378 switch (lc->def.bit_size) {
1379 case 64:
1380 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1381 break;
1382
1383 case 32:
1384 for (unsigned i = 0; i < lc->def.num_components; i++)
1385 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1386 break;
1387
1388 case 16:
1389 for (unsigned i = 0; i < lc->def.num_components; i++)
1390 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1391 break;
1392
1393 default:
1394 assert(lc->def.bit_size <= 8);
1395 for (unsigned i = 0; i < lc->def.num_components; i++)
1396 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1397 break;
1398 }
1399 break;
1400 }
1401
1402 read_add_object(ctx, &lc->def);
1403 return lc;
1404 }
1405
1406 static void
1407 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1408 {
1409 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1410
1411 union packed_instr header;
1412 header.u32 = 0;
1413
1414 header.undef.instr_type = undef->instr.type;
1415 header.undef.last_component = undef->def.num_components - 1;
1416 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1417
1418 blob_write_uint32(ctx->blob, header.u32);
1419 write_add_object(ctx, &undef->def);
1420 }
1421
1422 static nir_ssa_undef_instr *
1423 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1424 {
1425 nir_ssa_undef_instr *undef =
1426 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1427 decode_bit_size_3bits(header.undef.bit_size));
1428
1429 read_add_object(ctx, &undef->def);
1430 return undef;
1431 }
1432
1433 union packed_tex_data {
1434 uint32_t u32;
1435 struct {
1436 enum glsl_sampler_dim sampler_dim:4;
1437 nir_alu_type dest_type:8;
1438 unsigned coord_components:3;
1439 unsigned is_array:1;
1440 unsigned is_shadow:1;
1441 unsigned is_new_style_shadow:1;
1442 unsigned component:2;
1443 unsigned texture_non_uniform:1;
1444 unsigned sampler_non_uniform:1;
1445 unsigned unused:8; /* Mark unused for valgrind. */
1446 } u;
1447 };
1448
1449 static void
1450 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1451 {
1452 assert(tex->num_srcs < 16);
1453 assert(tex->op < 16);
1454 assert(tex->texture_array_size < 1024);
1455
1456 union packed_instr header;
1457 header.u32 = 0;
1458
1459 header.tex.instr_type = tex->instr.type;
1460 header.tex.num_srcs = tex->num_srcs;
1461 header.tex.op = tex->op;
1462 header.tex.texture_array_size = tex->texture_array_size;
1463
1464 write_dest(ctx, &tex->dest, header, tex->instr.type);
1465
1466 blob_write_uint32(ctx->blob, tex->texture_index);
1467 blob_write_uint32(ctx->blob, tex->sampler_index);
1468 if (tex->op == nir_texop_tg4)
1469 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1470
1471 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1472 union packed_tex_data packed = {
1473 .u.sampler_dim = tex->sampler_dim,
1474 .u.dest_type = tex->dest_type,
1475 .u.coord_components = tex->coord_components,
1476 .u.is_array = tex->is_array,
1477 .u.is_shadow = tex->is_shadow,
1478 .u.is_new_style_shadow = tex->is_new_style_shadow,
1479 .u.component = tex->component,
1480 .u.texture_non_uniform = tex->texture_non_uniform,
1481 .u.sampler_non_uniform = tex->sampler_non_uniform,
1482 };
1483 blob_write_uint32(ctx->blob, packed.u32);
1484
1485 for (unsigned i = 0; i < tex->num_srcs; i++) {
1486 union packed_src src;
1487 src.u32 = 0;
1488 src.tex.src_type = tex->src[i].src_type;
1489 write_src_full(ctx, &tex->src[i].src, src);
1490 }
1491 }
1492
1493 static nir_tex_instr *
1494 read_tex(read_ctx *ctx, union packed_instr header)
1495 {
1496 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1497
1498 read_dest(ctx, &tex->dest, &tex->instr, header);
1499
1500 tex->op = header.tex.op;
1501 tex->texture_index = blob_read_uint32(ctx->blob);
1502 tex->texture_array_size = header.tex.texture_array_size;
1503 tex->sampler_index = blob_read_uint32(ctx->blob);
1504 if (tex->op == nir_texop_tg4)
1505 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1506
1507 union packed_tex_data packed;
1508 packed.u32 = blob_read_uint32(ctx->blob);
1509 tex->sampler_dim = packed.u.sampler_dim;
1510 tex->dest_type = packed.u.dest_type;
1511 tex->coord_components = packed.u.coord_components;
1512 tex->is_array = packed.u.is_array;
1513 tex->is_shadow = packed.u.is_shadow;
1514 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1515 tex->component = packed.u.component;
1516 tex->texture_non_uniform = packed.u.texture_non_uniform;
1517 tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1518
1519 for (unsigned i = 0; i < tex->num_srcs; i++) {
1520 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1521 tex->src[i].src_type = src.tex.src_type;
1522 }
1523
1524 return tex;
1525 }
1526
1527 static void
1528 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1529 {
1530 union packed_instr header;
1531 header.u32 = 0;
1532
1533 header.phi.instr_type = phi->instr.type;
1534 header.phi.num_srcs = exec_list_length(&phi->srcs);
1535
1536 /* Phi nodes are special, since they may reference SSA definitions and
1537 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1538 * and then store enough information so that a later fixup pass can fill
1539 * them in correctly.
1540 */
1541 write_dest(ctx, &phi->dest, header, phi->instr.type);
1542
1543 nir_foreach_phi_src(src, phi) {
1544 assert(src->src.is_ssa);
1545 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1546 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1547 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1548 write_phi_fixup fixup = {
1549 .blob_offset = blob_offset,
1550 .src = src->src.ssa,
1551 .block = src->pred,
1552 };
1553 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1554 }
1555 }
1556
1557 static void
1558 write_fixup_phis(write_ctx *ctx)
1559 {
1560 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1561 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1562 blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1563 blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1564 }
1565
1566 util_dynarray_clear(&ctx->phi_fixups);
1567 }
1568
1569 static nir_phi_instr *
1570 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1571 {
1572 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1573
1574 read_dest(ctx, &phi->dest, &phi->instr, header);
1575
1576 /* For similar reasons as before, we just store the index directly into the
1577 * pointer, and let a later pass resolve the phi sources.
1578 *
1579 * In order to ensure that the copied sources (which are just the indices
1580 * from the blob for now) don't get inserted into the old shader's use-def
1581 * lists, we have to add the phi instruction *before* we set up its
1582 * sources.
1583 */
1584 nir_instr_insert_after_block(blk, &phi->instr);
1585
1586 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1587 nir_phi_src *src = ralloc(phi, nir_phi_src);
1588
1589 src->src.is_ssa = true;
1590 src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1591 src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1592
1593 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1594 * we have to set the parent_instr manually. It doesn't really matter
1595 * when we do it, so we might as well do it here.
1596 */
1597 src->src.parent_instr = &phi->instr;
1598
1599 /* Stash it in the list of phi sources. We'll walk this list and fix up
1600 * sources at the very end of read_function_impl.
1601 */
1602 list_add(&src->src.use_link, &ctx->phi_srcs);
1603
1604 exec_list_push_tail(&phi->srcs, &src->node);
1605 }
1606
1607 return phi;
1608 }
1609
1610 static void
1611 read_fixup_phis(read_ctx *ctx)
1612 {
1613 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1614 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1615 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1616
1617 /* Remove from this list */
1618 list_del(&src->src.use_link);
1619
1620 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1621 }
1622 assert(list_is_empty(&ctx->phi_srcs));
1623 }
1624
1625 static void
1626 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1627 {
1628 assert(jmp->type < 4);
1629
1630 union packed_instr header;
1631 header.u32 = 0;
1632
1633 header.jump.instr_type = jmp->instr.type;
1634 header.jump.type = jmp->type;
1635
1636 blob_write_uint32(ctx->blob, header.u32);
1637 }
1638
1639 static nir_jump_instr *
1640 read_jump(read_ctx *ctx, union packed_instr header)
1641 {
1642 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1643 return jmp;
1644 }
1645
1646 static void
1647 write_call(write_ctx *ctx, const nir_call_instr *call)
1648 {
1649 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1650
1651 for (unsigned i = 0; i < call->num_params; i++)
1652 write_src(ctx, &call->params[i]);
1653 }
1654
1655 static nir_call_instr *
1656 read_call(read_ctx *ctx)
1657 {
1658 nir_function *callee = read_object(ctx);
1659 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1660
1661 for (unsigned i = 0; i < call->num_params; i++)
1662 read_src(ctx, &call->params[i], call);
1663
1664 return call;
1665 }
1666
1667 static void
1668 write_instr(write_ctx *ctx, const nir_instr *instr)
1669 {
1670 /* We have only 4 bits for the instruction type. */
1671 assert(instr->type < 16);
1672
1673 switch (instr->type) {
1674 case nir_instr_type_alu:
1675 write_alu(ctx, nir_instr_as_alu(instr));
1676 break;
1677 case nir_instr_type_deref:
1678 write_deref(ctx, nir_instr_as_deref(instr));
1679 break;
1680 case nir_instr_type_intrinsic:
1681 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1682 break;
1683 case nir_instr_type_load_const:
1684 write_load_const(ctx, nir_instr_as_load_const(instr));
1685 break;
1686 case nir_instr_type_ssa_undef:
1687 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1688 break;
1689 case nir_instr_type_tex:
1690 write_tex(ctx, nir_instr_as_tex(instr));
1691 break;
1692 case nir_instr_type_phi:
1693 write_phi(ctx, nir_instr_as_phi(instr));
1694 break;
1695 case nir_instr_type_jump:
1696 write_jump(ctx, nir_instr_as_jump(instr));
1697 break;
1698 case nir_instr_type_call:
1699 blob_write_uint32(ctx->blob, instr->type);
1700 write_call(ctx, nir_instr_as_call(instr));
1701 break;
1702 case nir_instr_type_parallel_copy:
1703 unreachable("Cannot write parallel copies");
1704 default:
1705 unreachable("bad instr type");
1706 }
1707 }
1708
1709 /* Return the number of instructions read. */
1710 static unsigned
1711 read_instr(read_ctx *ctx, nir_block *block)
1712 {
1713 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1714 union packed_instr header;
1715 header.u32 = blob_read_uint32(ctx->blob);
1716 nir_instr *instr;
1717
1718 switch (header.any.instr_type) {
1719 case nir_instr_type_alu:
1720 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1721 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1722 return header.alu.num_followup_alu_sharing_header + 1;
1723 case nir_instr_type_deref:
1724 instr = &read_deref(ctx, header)->instr;
1725 break;
1726 case nir_instr_type_intrinsic:
1727 instr = &read_intrinsic(ctx, header)->instr;
1728 break;
1729 case nir_instr_type_load_const:
1730 instr = &read_load_const(ctx, header)->instr;
1731 break;
1732 case nir_instr_type_ssa_undef:
1733 instr = &read_ssa_undef(ctx, header)->instr;
1734 break;
1735 case nir_instr_type_tex:
1736 instr = &read_tex(ctx, header)->instr;
1737 break;
1738 case nir_instr_type_phi:
1739 /* Phi instructions are a bit of a special case when reading because we
1740 * don't want inserting the instruction to automatically handle use/defs
1741 * for us. Instead, we need to wait until all the blocks/instructions
1742 * are read so that we can set their sources up.
1743 */
1744 read_phi(ctx, block, header);
1745 return 1;
1746 case nir_instr_type_jump:
1747 instr = &read_jump(ctx, header)->instr;
1748 break;
1749 case nir_instr_type_call:
1750 instr = &read_call(ctx)->instr;
1751 break;
1752 case nir_instr_type_parallel_copy:
1753 unreachable("Cannot read parallel copies");
1754 default:
1755 unreachable("bad instr type");
1756 }
1757
1758 nir_instr_insert_after_block(block, instr);
1759 return 1;
1760 }
1761
1762 static void
1763 write_block(write_ctx *ctx, const nir_block *block)
1764 {
1765 write_add_object(ctx, block);
1766 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1767
1768 ctx->last_instr_type = ~0;
1769 ctx->last_alu_header_offset = 0;
1770
1771 nir_foreach_instr(instr, block) {
1772 write_instr(ctx, instr);
1773 ctx->last_instr_type = instr->type;
1774 }
1775 }
1776
1777 static void
1778 read_block(read_ctx *ctx, struct exec_list *cf_list)
1779 {
1780 /* Don't actually create a new block. Just use the one from the tail of
1781 * the list. NIR guarantees that the tail of the list is a block and that
1782 * no two blocks are side-by-side in the IR; It should be empty.
1783 */
1784 nir_block *block =
1785 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1786
1787 read_add_object(ctx, block);
1788 unsigned num_instrs = blob_read_uint32(ctx->blob);
1789 for (unsigned i = 0; i < num_instrs;) {
1790 i += read_instr(ctx, block);
1791 }
1792 }
1793
1794 static void
1795 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1796
1797 static void
1798 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1799
1800 static void
1801 write_if(write_ctx *ctx, nir_if *nif)
1802 {
1803 write_src(ctx, &nif->condition);
1804
1805 write_cf_list(ctx, &nif->then_list);
1806 write_cf_list(ctx, &nif->else_list);
1807 }
1808
1809 static void
1810 read_if(read_ctx *ctx, struct exec_list *cf_list)
1811 {
1812 nir_if *nif = nir_if_create(ctx->nir);
1813
1814 read_src(ctx, &nif->condition, nif);
1815
1816 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1817
1818 read_cf_list(ctx, &nif->then_list);
1819 read_cf_list(ctx, &nif->else_list);
1820 }
1821
1822 static void
1823 write_loop(write_ctx *ctx, nir_loop *loop)
1824 {
1825 write_cf_list(ctx, &loop->body);
1826 }
1827
1828 static void
1829 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1830 {
1831 nir_loop *loop = nir_loop_create(ctx->nir);
1832
1833 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1834
1835 read_cf_list(ctx, &loop->body);
1836 }
1837
1838 static void
1839 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1840 {
1841 blob_write_uint32(ctx->blob, cf->type);
1842
1843 switch (cf->type) {
1844 case nir_cf_node_block:
1845 write_block(ctx, nir_cf_node_as_block(cf));
1846 break;
1847 case nir_cf_node_if:
1848 write_if(ctx, nir_cf_node_as_if(cf));
1849 break;
1850 case nir_cf_node_loop:
1851 write_loop(ctx, nir_cf_node_as_loop(cf));
1852 break;
1853 default:
1854 unreachable("bad cf type");
1855 }
1856 }
1857
1858 static void
1859 read_cf_node(read_ctx *ctx, struct exec_list *list)
1860 {
1861 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1862
1863 switch (type) {
1864 case nir_cf_node_block:
1865 read_block(ctx, list);
1866 break;
1867 case nir_cf_node_if:
1868 read_if(ctx, list);
1869 break;
1870 case nir_cf_node_loop:
1871 read_loop(ctx, list);
1872 break;
1873 default:
1874 unreachable("bad cf type");
1875 }
1876 }
1877
1878 static void
1879 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1880 {
1881 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1882 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1883 write_cf_node(ctx, cf);
1884 }
1885 }
1886
1887 static void
1888 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1889 {
1890 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1891 for (unsigned i = 0; i < num_cf_nodes; i++)
1892 read_cf_node(ctx, cf_list);
1893 }
1894
1895 static void
1896 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1897 {
1898 write_var_list(ctx, &fi->locals);
1899 write_reg_list(ctx, &fi->registers);
1900 blob_write_uint32(ctx->blob, fi->reg_alloc);
1901
1902 write_cf_list(ctx, &fi->body);
1903 write_fixup_phis(ctx);
1904 }
1905
1906 static nir_function_impl *
1907 read_function_impl(read_ctx *ctx, nir_function *fxn)
1908 {
1909 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1910 fi->function = fxn;
1911
1912 read_var_list(ctx, &fi->locals);
1913 read_reg_list(ctx, &fi->registers);
1914 fi->reg_alloc = blob_read_uint32(ctx->blob);
1915
1916 read_cf_list(ctx, &fi->body);
1917 read_fixup_phis(ctx);
1918
1919 fi->valid_metadata = 0;
1920
1921 return fi;
1922 }
1923
1924 static void
1925 write_function(write_ctx *ctx, const nir_function *fxn)
1926 {
1927 uint32_t flags = fxn->is_entrypoint;
1928 if (fxn->name)
1929 flags |= 0x2;
1930 if (fxn->impl)
1931 flags |= 0x4;
1932 blob_write_uint32(ctx->blob, flags);
1933 if (fxn->name)
1934 blob_write_string(ctx->blob, fxn->name);
1935
1936 write_add_object(ctx, fxn);
1937
1938 blob_write_uint32(ctx->blob, fxn->num_params);
1939 for (unsigned i = 0; i < fxn->num_params; i++) {
1940 uint32_t val =
1941 ((uint32_t)fxn->params[i].num_components) |
1942 ((uint32_t)fxn->params[i].bit_size) << 8;
1943 blob_write_uint32(ctx->blob, val);
1944 }
1945
1946 /* At first glance, it looks like we should write the function_impl here.
1947 * However, call instructions need to be able to reference at least the
1948 * function and those will get processed as we write the function_impls.
1949 * We stop here and write function_impls as a second pass.
1950 */
1951 }
1952
1953 static void
1954 read_function(read_ctx *ctx)
1955 {
1956 uint32_t flags = blob_read_uint32(ctx->blob);
1957 bool has_name = flags & 0x2;
1958 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1959
1960 nir_function *fxn = nir_function_create(ctx->nir, name);
1961
1962 read_add_object(ctx, fxn);
1963
1964 fxn->num_params = blob_read_uint32(ctx->blob);
1965 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1966 for (unsigned i = 0; i < fxn->num_params; i++) {
1967 uint32_t val = blob_read_uint32(ctx->blob);
1968 fxn->params[i].num_components = val & 0xff;
1969 fxn->params[i].bit_size = (val >> 8) & 0xff;
1970 }
1971
1972 fxn->is_entrypoint = flags & 0x1;
1973 if (flags & 0x4)
1974 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1975 }
1976
1977 /**
1978 * Serialize NIR into a binary blob.
1979 *
1980 * \param strip Don't serialize information only useful for debugging,
1981 * such as variable names, making cache hits from similar
1982 * shaders more likely.
1983 */
1984 void
1985 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
1986 {
1987 write_ctx ctx = {0};
1988 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
1989 ctx.blob = blob;
1990 ctx.nir = nir;
1991 ctx.strip = strip;
1992 util_dynarray_init(&ctx.phi_fixups, NULL);
1993
1994 size_t idx_size_offset = blob_reserve_uint32(blob);
1995
1996 struct shader_info info = nir->info;
1997 uint32_t strings = 0;
1998 if (!strip && info.name)
1999 strings |= 0x1;
2000 if (!strip && info.label)
2001 strings |= 0x2;
2002 blob_write_uint32(blob, strings);
2003 if (!strip && info.name)
2004 blob_write_string(blob, info.name);
2005 if (!strip && info.label)
2006 blob_write_string(blob, info.label);
2007 info.name = info.label = NULL;
2008 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2009
2010 write_var_list(&ctx, &nir->uniforms);
2011 write_var_list(&ctx, &nir->inputs);
2012 write_var_list(&ctx, &nir->outputs);
2013 write_var_list(&ctx, &nir->shared);
2014 write_var_list(&ctx, &nir->globals);
2015 write_var_list(&ctx, &nir->system_values);
2016
2017 blob_write_uint32(blob, nir->num_inputs);
2018 blob_write_uint32(blob, nir->num_uniforms);
2019 blob_write_uint32(blob, nir->num_outputs);
2020 blob_write_uint32(blob, nir->num_shared);
2021 blob_write_uint32(blob, nir->scratch_size);
2022
2023 blob_write_uint32(blob, exec_list_length(&nir->functions));
2024 nir_foreach_function(fxn, nir) {
2025 write_function(&ctx, fxn);
2026 }
2027
2028 nir_foreach_function(fxn, nir) {
2029 if (fxn->impl)
2030 write_function_impl(&ctx, fxn->impl);
2031 }
2032
2033 blob_write_uint32(blob, nir->constant_data_size);
2034 if (nir->constant_data_size > 0)
2035 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2036
2037 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
2038
2039 _mesa_hash_table_destroy(ctx.remap_table, NULL);
2040 util_dynarray_fini(&ctx.phi_fixups);
2041 }
2042
2043 nir_shader *
2044 nir_deserialize(void *mem_ctx,
2045 const struct nir_shader_compiler_options *options,
2046 struct blob_reader *blob)
2047 {
2048 read_ctx ctx = {0};
2049 ctx.blob = blob;
2050 list_inithead(&ctx.phi_srcs);
2051 ctx.idx_table_len = blob_read_uint32(blob);
2052 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2053
2054 uint32_t strings = blob_read_uint32(blob);
2055 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2056 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2057
2058 struct shader_info info;
2059 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2060
2061 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2062
2063 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2064 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2065
2066 ctx.nir->info = info;
2067
2068 read_var_list(&ctx, &ctx.nir->uniforms);
2069 read_var_list(&ctx, &ctx.nir->inputs);
2070 read_var_list(&ctx, &ctx.nir->outputs);
2071 read_var_list(&ctx, &ctx.nir->shared);
2072 read_var_list(&ctx, &ctx.nir->globals);
2073 read_var_list(&ctx, &ctx.nir->system_values);
2074
2075 ctx.nir->num_inputs = blob_read_uint32(blob);
2076 ctx.nir->num_uniforms = blob_read_uint32(blob);
2077 ctx.nir->num_outputs = blob_read_uint32(blob);
2078 ctx.nir->num_shared = blob_read_uint32(blob);
2079 ctx.nir->scratch_size = blob_read_uint32(blob);
2080
2081 unsigned num_functions = blob_read_uint32(blob);
2082 for (unsigned i = 0; i < num_functions; i++)
2083 read_function(&ctx);
2084
2085 nir_foreach_function(fxn, ctx.nir) {
2086 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2087 fxn->impl = read_function_impl(&ctx, fxn);
2088 }
2089
2090 ctx.nir->constant_data_size = blob_read_uint32(blob);
2091 if (ctx.nir->constant_data_size > 0) {
2092 ctx.nir->constant_data =
2093 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2094 blob_copy_bytes(blob, ctx.nir->constant_data,
2095 ctx.nir->constant_data_size);
2096 }
2097
2098 free(ctx.idx_table);
2099
2100 return ctx.nir;
2101 }
2102
2103 void
2104 nir_shader_serialize_deserialize(nir_shader *shader)
2105 {
2106 const struct nir_shader_compiler_options *options = shader->options;
2107
2108 struct blob writer;
2109 blob_init(&writer);
2110 nir_serialize(&writer, shader, false);
2111
2112 /* Delete all of dest's ralloc children but leave dest alone */
2113 void *dead_ctx = ralloc_context(NULL);
2114 ralloc_adopt(dead_ctx, shader);
2115 ralloc_free(dead_ctx);
2116
2117 dead_ctx = ralloc_context(NULL);
2118
2119 struct blob_reader reader;
2120 blob_reader_init(&reader, writer.data, writer.size);
2121 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2122
2123 blob_finish(&writer);
2124
2125 nir_shader_replace(shader, copy);
2126 ralloc_free(dead_ctx);
2127 }