2 * Copyright © 2017 Connor Abbott
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
39 const nir_shader
*nir
;
43 /* maps pointer to index */
44 struct hash_table
*remap_table
;
46 /* the next index to assign to a NIR in-memory object */
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
52 struct util_dynarray phi_fixups
;
54 /* The last serialized type. */
55 const struct glsl_type
*last_type
;
56 const struct glsl_type
*last_interface_type
;
57 struct nir_variable_data last_var_data
;
59 /* For skipping equal ALU headers (typical after scalarization). */
60 nir_instr_type last_instr_type
;
61 uintptr_t last_alu_header_offset
;
63 /* Don't write optional data such as variable names. */
70 struct blob_reader
*blob
;
72 /* the next index to assign to a NIR in-memory object */
75 /* The length of the index -> object table */
76 uint32_t idx_table_len
;
78 /* map from index to deserialized pointer */
81 /* List of phi sources. */
82 struct list_head phi_srcs
;
84 /* The last deserialized type. */
85 const struct glsl_type
*last_type
;
86 const struct glsl_type
*last_interface_type
;
87 struct nir_variable_data last_var_data
;
91 write_add_object(write_ctx
*ctx
, const void *obj
)
93 uint32_t index
= ctx
->next_idx
++;
94 assert(index
!= MAX_OBJECT_IDS
);
95 _mesa_hash_table_insert(ctx
->remap_table
, obj
, (void *)(uintptr_t) index
);
99 write_lookup_object(write_ctx
*ctx
, const void *obj
)
101 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->remap_table
, obj
);
103 return (uint32_t)(uintptr_t) entry
->data
;
107 read_add_object(read_ctx
*ctx
, void *obj
)
109 assert(ctx
->next_idx
< ctx
->idx_table_len
);
110 ctx
->idx_table
[ctx
->next_idx
++] = obj
;
114 read_lookup_object(read_ctx
*ctx
, uint32_t idx
)
116 assert(idx
< ctx
->idx_table_len
);
117 return ctx
->idx_table
[idx
];
121 read_object(read_ctx
*ctx
)
123 return read_lookup_object(ctx
, blob_read_uint32(ctx
->blob
));
127 encode_bit_size_3bits(uint8_t bit_size
)
129 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130 assert(bit_size
<= 64 && util_is_power_of_two_or_zero(bit_size
));
132 return util_logbase2(bit_size
) + 1;
137 decode_bit_size_3bits(uint8_t bit_size
)
140 return 1 << (bit_size
- 1);
144 #define NUM_COMPONENTS_IS_SEPARATE_7 7
147 encode_num_components_in_3bits(uint8_t num_components
)
149 if (num_components
<= 4)
150 return num_components
;
151 if (num_components
== 8)
153 if (num_components
== 16)
156 /* special value indicating that num_components is in the next uint32 */
157 return NUM_COMPONENTS_IS_SEPARATE_7
;
161 decode_num_components_in_3bits(uint8_t value
)
170 unreachable("invalid num_components encoding");
175 write_constant(write_ctx
*ctx
, const nir_constant
*c
)
177 blob_write_bytes(ctx
->blob
, c
->values
, sizeof(c
->values
));
178 blob_write_uint32(ctx
->blob
, c
->num_elements
);
179 for (unsigned i
= 0; i
< c
->num_elements
; i
++)
180 write_constant(ctx
, c
->elements
[i
]);
183 static nir_constant
*
184 read_constant(read_ctx
*ctx
, nir_variable
*nvar
)
186 nir_constant
*c
= ralloc(nvar
, nir_constant
);
188 blob_copy_bytes(ctx
->blob
, (uint8_t *)c
->values
, sizeof(c
->values
));
189 c
->num_elements
= blob_read_uint32(ctx
->blob
);
190 c
->elements
= ralloc_array(nvar
, nir_constant
*, c
->num_elements
);
191 for (unsigned i
= 0; i
< c
->num_elements
; i
++)
192 c
->elements
[i
] = read_constant(ctx
, nvar
);
197 enum var_data_encoding
{
199 var_encode_shader_temp
,
200 var_encode_function_temp
,
201 var_encode_location_diff
,
208 unsigned has_constant_initializer
:1;
209 unsigned has_pointer_initializer
:1;
210 unsigned has_interface_type
:1;
211 unsigned num_state_slots
:7;
212 unsigned data_encoding
:2;
213 unsigned type_same_as_last
:1;
214 unsigned interface_type_same_as_last
:1;
216 unsigned num_members
:16;
220 union packed_var_data_diff
{
225 int driver_location
:16;
230 write_variable(write_ctx
*ctx
, const nir_variable
*var
)
232 write_add_object(ctx
, var
);
234 assert(var
->num_state_slots
< (1 << 7));
236 STATIC_ASSERT(sizeof(union packed_var
) == 4);
237 union packed_var flags
;
240 flags
.u
.has_name
= !ctx
->strip
&& var
->name
;
241 flags
.u
.has_constant_initializer
= !!(var
->constant_initializer
);
242 flags
.u
.has_pointer_initializer
= !!(var
->pointer_initializer
);
243 flags
.u
.has_interface_type
= !!(var
->interface_type
);
244 flags
.u
.type_same_as_last
= var
->type
== ctx
->last_type
;
245 flags
.u
.interface_type_same_as_last
=
246 var
->interface_type
&& var
->interface_type
== ctx
->last_interface_type
;
247 flags
.u
.num_state_slots
= var
->num_state_slots
;
248 flags
.u
.num_members
= var
->num_members
;
250 struct nir_variable_data data
= var
->data
;
252 /* When stripping, we expect that the location is no longer needed,
253 * which is typically after shaders are linked.
256 data
.mode
!= nir_var_shader_in
&&
257 data
.mode
!= nir_var_shader_out
)
260 /* Temporary variables don't serialize var->data. */
261 if (data
.mode
== nir_var_shader_temp
)
262 flags
.u
.data_encoding
= var_encode_shader_temp
;
263 else if (data
.mode
== nir_var_function_temp
)
264 flags
.u
.data_encoding
= var_encode_function_temp
;
266 struct nir_variable_data tmp
= data
;
268 tmp
.location
= ctx
->last_var_data
.location
;
269 tmp
.location_frac
= ctx
->last_var_data
.location_frac
;
270 tmp
.driver_location
= ctx
->last_var_data
.driver_location
;
272 /* See if we can encode only the difference in locations from the last
275 if (memcmp(&ctx
->last_var_data
, &tmp
, sizeof(tmp
)) == 0 &&
276 abs((int)data
.location
-
277 (int)ctx
->last_var_data
.location
) < (1 << 12) &&
278 abs((int)data
.driver_location
-
279 (int)ctx
->last_var_data
.driver_location
) < (1 << 15))
280 flags
.u
.data_encoding
= var_encode_location_diff
;
282 flags
.u
.data_encoding
= var_encode_full
;
285 blob_write_uint32(ctx
->blob
, flags
.u32
);
287 if (!flags
.u
.type_same_as_last
) {
288 encode_type_to_blob(ctx
->blob
, var
->type
);
289 ctx
->last_type
= var
->type
;
292 if (var
->interface_type
&& !flags
.u
.interface_type_same_as_last
) {
293 encode_type_to_blob(ctx
->blob
, var
->interface_type
);
294 ctx
->last_interface_type
= var
->interface_type
;
297 if (flags
.u
.has_name
)
298 blob_write_string(ctx
->blob
, var
->name
);
300 if (flags
.u
.data_encoding
== var_encode_full
||
301 flags
.u
.data_encoding
== var_encode_location_diff
) {
302 if (flags
.u
.data_encoding
== var_encode_full
) {
303 blob_write_bytes(ctx
->blob
, &data
, sizeof(data
));
305 /* Serialize only the difference in locations from the last variable.
307 union packed_var_data_diff diff
;
309 diff
.u
.location
= data
.location
- ctx
->last_var_data
.location
;
310 diff
.u
.location_frac
= data
.location_frac
-
311 ctx
->last_var_data
.location_frac
;
312 diff
.u
.driver_location
= data
.driver_location
-
313 ctx
->last_var_data
.driver_location
;
315 blob_write_uint32(ctx
->blob
, diff
.u32
);
318 ctx
->last_var_data
= data
;
321 for (unsigned i
= 0; i
< var
->num_state_slots
; i
++) {
322 blob_write_bytes(ctx
->blob
, &var
->state_slots
[i
],
323 sizeof(var
->state_slots
[i
]));
325 if (var
->constant_initializer
)
326 write_constant(ctx
, var
->constant_initializer
);
327 if (var
->pointer_initializer
)
328 write_lookup_object(ctx
, var
->pointer_initializer
);
329 if (var
->num_members
> 0) {
330 blob_write_bytes(ctx
->blob
, (uint8_t *) var
->members
,
331 var
->num_members
* sizeof(*var
->members
));
335 static nir_variable
*
336 read_variable(read_ctx
*ctx
)
338 nir_variable
*var
= rzalloc(ctx
->nir
, nir_variable
);
339 read_add_object(ctx
, var
);
341 union packed_var flags
;
342 flags
.u32
= blob_read_uint32(ctx
->blob
);
344 if (flags
.u
.type_same_as_last
) {
345 var
->type
= ctx
->last_type
;
347 var
->type
= decode_type_from_blob(ctx
->blob
);
348 ctx
->last_type
= var
->type
;
351 if (flags
.u
.has_interface_type
) {
352 if (flags
.u
.interface_type_same_as_last
) {
353 var
->interface_type
= ctx
->last_interface_type
;
355 var
->interface_type
= decode_type_from_blob(ctx
->blob
);
356 ctx
->last_interface_type
= var
->interface_type
;
360 if (flags
.u
.has_name
) {
361 const char *name
= blob_read_string(ctx
->blob
);
362 var
->name
= ralloc_strdup(var
, name
);
367 if (flags
.u
.data_encoding
== var_encode_shader_temp
)
368 var
->data
.mode
= nir_var_shader_temp
;
369 else if (flags
.u
.data_encoding
== var_encode_function_temp
)
370 var
->data
.mode
= nir_var_function_temp
;
371 else if (flags
.u
.data_encoding
== var_encode_full
) {
372 blob_copy_bytes(ctx
->blob
, (uint8_t *) &var
->data
, sizeof(var
->data
));
373 ctx
->last_var_data
= var
->data
;
374 } else { /* var_encode_location_diff */
375 union packed_var_data_diff diff
;
376 diff
.u32
= blob_read_uint32(ctx
->blob
);
378 var
->data
= ctx
->last_var_data
;
379 var
->data
.location
+= diff
.u
.location
;
380 var
->data
.location_frac
+= diff
.u
.location_frac
;
381 var
->data
.driver_location
+= diff
.u
.driver_location
;
383 ctx
->last_var_data
= var
->data
;
386 var
->num_state_slots
= flags
.u
.num_state_slots
;
387 if (var
->num_state_slots
!= 0) {
388 var
->state_slots
= ralloc_array(var
, nir_state_slot
,
389 var
->num_state_slots
);
390 for (unsigned i
= 0; i
< var
->num_state_slots
; i
++) {
391 blob_copy_bytes(ctx
->blob
, &var
->state_slots
[i
],
392 sizeof(var
->state_slots
[i
]));
395 if (flags
.u
.has_constant_initializer
)
396 var
->constant_initializer
= read_constant(ctx
, var
);
398 var
->constant_initializer
= NULL
;
400 if (flags
.u
.has_pointer_initializer
)
401 var
->pointer_initializer
= read_object(ctx
);
403 var
->pointer_initializer
= NULL
;
405 var
->num_members
= flags
.u
.num_members
;
406 if (var
->num_members
> 0) {
407 var
->members
= ralloc_array(var
, struct nir_variable_data
,
409 blob_copy_bytes(ctx
->blob
, (uint8_t *) var
->members
,
410 var
->num_members
* sizeof(*var
->members
));
417 write_var_list(write_ctx
*ctx
, const struct exec_list
*src
)
419 blob_write_uint32(ctx
->blob
, exec_list_length(src
));
420 foreach_list_typed(nir_variable
, var
, node
, src
) {
421 write_variable(ctx
, var
);
426 read_var_list(read_ctx
*ctx
, struct exec_list
*dst
)
428 exec_list_make_empty(dst
);
429 unsigned num_vars
= blob_read_uint32(ctx
->blob
);
430 for (unsigned i
= 0; i
< num_vars
; i
++) {
431 nir_variable
*var
= read_variable(ctx
);
432 exec_list_push_tail(dst
, &var
->node
);
437 write_register(write_ctx
*ctx
, const nir_register
*reg
)
439 write_add_object(ctx
, reg
);
440 blob_write_uint32(ctx
->blob
, reg
->num_components
);
441 blob_write_uint32(ctx
->blob
, reg
->bit_size
);
442 blob_write_uint32(ctx
->blob
, reg
->num_array_elems
);
443 blob_write_uint32(ctx
->blob
, reg
->index
);
444 blob_write_uint32(ctx
->blob
, !ctx
->strip
&& reg
->name
);
445 if (!ctx
->strip
&& reg
->name
)
446 blob_write_string(ctx
->blob
, reg
->name
);
449 static nir_register
*
450 read_register(read_ctx
*ctx
)
452 nir_register
*reg
= ralloc(ctx
->nir
, nir_register
);
453 read_add_object(ctx
, reg
);
454 reg
->num_components
= blob_read_uint32(ctx
->blob
);
455 reg
->bit_size
= blob_read_uint32(ctx
->blob
);
456 reg
->num_array_elems
= blob_read_uint32(ctx
->blob
);
457 reg
->index
= blob_read_uint32(ctx
->blob
);
458 bool has_name
= blob_read_uint32(ctx
->blob
);
460 const char *name
= blob_read_string(ctx
->blob
);
461 reg
->name
= ralloc_strdup(reg
, name
);
466 list_inithead(®
->uses
);
467 list_inithead(®
->defs
);
468 list_inithead(®
->if_uses
);
474 write_reg_list(write_ctx
*ctx
, const struct exec_list
*src
)
476 blob_write_uint32(ctx
->blob
, exec_list_length(src
));
477 foreach_list_typed(nir_register
, reg
, node
, src
)
478 write_register(ctx
, reg
);
482 read_reg_list(read_ctx
*ctx
, struct exec_list
*dst
)
484 exec_list_make_empty(dst
);
485 unsigned num_regs
= blob_read_uint32(ctx
->blob
);
486 for (unsigned i
= 0; i
< num_regs
; i
++) {
487 nir_register
*reg
= read_register(ctx
);
488 exec_list_push_tail(dst
, ®
->node
);
495 unsigned is_ssa
:1; /* <-- Header */
496 unsigned is_indirect
:1;
497 unsigned object_idx
:20;
498 unsigned _footer
:10; /* <-- Footer */
501 unsigned _header
:22; /* <-- Header */
502 unsigned negate
:1; /* <-- Footer */
504 unsigned swizzle_x
:2;
505 unsigned swizzle_y
:2;
506 unsigned swizzle_z
:2;
507 unsigned swizzle_w
:2;
510 unsigned _header
:22; /* <-- Header */
511 unsigned src_type
:5; /* <-- Footer */
517 write_src_full(write_ctx
*ctx
, const nir_src
*src
, union packed_src header
)
519 /* Since sources are very frequent, we try to save some space when storing
520 * them. In particular, we store whether the source is a register and
521 * whether the register has an indirect index in the low two bits. We can
522 * assume that the high two bits of the index are zero, since otherwise our
523 * address space would've been exhausted allocating the remap table!
525 header
.any
.is_ssa
= src
->is_ssa
;
527 header
.any
.object_idx
= write_lookup_object(ctx
, src
->ssa
);
528 blob_write_uint32(ctx
->blob
, header
.u32
);
530 header
.any
.object_idx
= write_lookup_object(ctx
, src
->reg
.reg
);
531 header
.any
.is_indirect
= !!src
->reg
.indirect
;
532 blob_write_uint32(ctx
->blob
, header
.u32
);
533 blob_write_uint32(ctx
->blob
, src
->reg
.base_offset
);
534 if (src
->reg
.indirect
) {
535 union packed_src header
= {0};
536 write_src_full(ctx
, src
->reg
.indirect
, header
);
542 write_src(write_ctx
*ctx
, const nir_src
*src
)
544 union packed_src header
= {0};
545 write_src_full(ctx
, src
, header
);
548 static union packed_src
549 read_src(read_ctx
*ctx
, nir_src
*src
, void *mem_ctx
)
551 STATIC_ASSERT(sizeof(union packed_src
) == 4);
552 union packed_src header
;
553 header
.u32
= blob_read_uint32(ctx
->blob
);
555 src
->is_ssa
= header
.any
.is_ssa
;
557 src
->ssa
= read_lookup_object(ctx
, header
.any
.object_idx
);
559 src
->reg
.reg
= read_lookup_object(ctx
, header
.any
.object_idx
);
560 src
->reg
.base_offset
= blob_read_uint32(ctx
->blob
);
561 if (header
.any
.is_indirect
) {
562 src
->reg
.indirect
= ralloc(mem_ctx
, nir_src
);
563 read_src(ctx
, src
->reg
.indirect
, mem_ctx
);
565 src
->reg
.indirect
= NULL
;
576 uint8_t num_components
:3;
581 uint8_t is_indirect
:1;
586 enum intrinsic_const_indices_encoding
{
587 /* Use the 9 bits of packed_const_indices to store 1-9 indices.
588 * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
589 * 4 2-bit indices, or 5-9 1-bit indices.
591 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
592 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
594 const_indices_9bit_all_combined
,
596 const_indices_8bit
, /* 8 bits per element */
597 const_indices_16bit
, /* 16 bits per element */
598 const_indices_32bit
, /* 32 bits per element */
601 enum load_const_packing
{
602 /* Constants are not packed and are stored in following dwords. */
605 /* packed_value contains high 19 bits, low bits are 0,
606 * good for floating-point decimals
608 load_const_scalar_hi_19bits
,
610 /* packed_value contains low 19 bits, high bits are sign-extended */
611 load_const_scalar_lo_19bits_sext
,
617 unsigned instr_type
:4; /* always present */
619 unsigned dest
:8; /* always last */
622 unsigned instr_type
:4;
624 unsigned no_signed_wrap
:1;
625 unsigned no_unsigned_wrap
:1;
627 /* Reg: writemask; SSA: swizzles for 2 srcs */
628 unsigned writemask_or_two_swizzles
:4;
630 unsigned packed_src_ssa_16bit
:1;
631 /* Scalarized ALUs always have the same header. */
632 unsigned num_followup_alu_sharing_header
:2;
636 unsigned instr_type
:4;
637 unsigned deref_type
:3;
638 unsigned cast_type_same_as_last
:1;
639 unsigned mode
:10; /* deref_var redefines this */
640 unsigned packed_src_ssa_16bit
:1; /* deref_var redefines this */
641 unsigned _pad
:5; /* deref_var redefines this */
645 unsigned instr_type
:4;
646 unsigned deref_type
:3;
648 unsigned object_idx
:16; /* if 0, the object ID is a separate uint32 */
652 unsigned instr_type
:4;
653 unsigned intrinsic
:9;
654 unsigned const_indices_encoding
:2;
655 unsigned packed_const_indices
:9;
659 unsigned instr_type
:4;
660 unsigned last_component
:4;
662 unsigned packing
:2; /* enum load_const_packing */
663 unsigned packed_value
:19; /* meaning determined by packing */
666 unsigned instr_type
:4;
667 unsigned last_component
:4;
672 unsigned instr_type
:4;
679 unsigned instr_type
:4;
680 unsigned num_srcs
:20;
684 unsigned instr_type
:4;
690 /* Write "lo24" as low 24 bits in the first uint32. */
692 write_dest(write_ctx
*ctx
, const nir_dest
*dst
, union packed_instr header
,
693 nir_instr_type instr_type
)
695 STATIC_ASSERT(sizeof(union packed_dest
) == 1);
696 union packed_dest dest
;
699 dest
.ssa
.is_ssa
= dst
->is_ssa
;
701 dest
.ssa
.has_name
= !ctx
->strip
&& dst
->ssa
.name
;
702 dest
.ssa
.num_components
=
703 encode_num_components_in_3bits(dst
->ssa
.num_components
);
704 dest
.ssa
.bit_size
= encode_bit_size_3bits(dst
->ssa
.bit_size
);
706 dest
.reg
.is_indirect
= !!(dst
->reg
.indirect
);
708 header
.any
.dest
= dest
.u8
;
710 /* Check if the current ALU instruction has the same header as the previous
711 * instruction that is also ALU. If it is, we don't have to write
712 * the current header. This is a typical occurence after scalarization.
714 if (instr_type
== nir_instr_type_alu
) {
715 bool equal_header
= false;
717 if (ctx
->last_instr_type
== nir_instr_type_alu
) {
718 assert(ctx
->last_alu_header_offset
);
719 union packed_instr
*last_header
=
720 (union packed_instr
*)(ctx
->blob
->data
+
721 ctx
->last_alu_header_offset
);
723 /* Clear the field that counts ALUs with equal headers. */
724 union packed_instr clean_header
;
725 clean_header
.u32
= last_header
->u32
;
726 clean_header
.alu
.num_followup_alu_sharing_header
= 0;
728 /* There can be at most 4 consecutive ALU instructions
729 * sharing the same header.
731 if (last_header
->alu
.num_followup_alu_sharing_header
< 3 &&
732 header
.u32
== clean_header
.u32
) {
733 last_header
->alu
.num_followup_alu_sharing_header
++;
739 ctx
->last_alu_header_offset
= ctx
->blob
->size
;
740 blob_write_uint32(ctx
->blob
, header
.u32
);
743 blob_write_uint32(ctx
->blob
, header
.u32
);
746 if (dest
.ssa
.is_ssa
&&
747 dest
.ssa
.num_components
== NUM_COMPONENTS_IS_SEPARATE_7
)
748 blob_write_uint32(ctx
->blob
, dst
->ssa
.num_components
);
751 write_add_object(ctx
, &dst
->ssa
);
752 if (dest
.ssa
.has_name
)
753 blob_write_string(ctx
->blob
, dst
->ssa
.name
);
755 blob_write_uint32(ctx
->blob
, write_lookup_object(ctx
, dst
->reg
.reg
));
756 blob_write_uint32(ctx
->blob
, dst
->reg
.base_offset
);
757 if (dst
->reg
.indirect
)
758 write_src(ctx
, dst
->reg
.indirect
);
763 read_dest(read_ctx
*ctx
, nir_dest
*dst
, nir_instr
*instr
,
764 union packed_instr header
)
766 union packed_dest dest
;
767 dest
.u8
= header
.any
.dest
;
769 if (dest
.ssa
.is_ssa
) {
770 unsigned bit_size
= decode_bit_size_3bits(dest
.ssa
.bit_size
);
771 unsigned num_components
;
772 if (dest
.ssa
.num_components
== NUM_COMPONENTS_IS_SEPARATE_7
)
773 num_components
= blob_read_uint32(ctx
->blob
);
775 num_components
= decode_num_components_in_3bits(dest
.ssa
.num_components
);
776 char *name
= dest
.ssa
.has_name
? blob_read_string(ctx
->blob
) : NULL
;
777 nir_ssa_dest_init(instr
, dst
, num_components
, bit_size
, name
);
778 read_add_object(ctx
, &dst
->ssa
);
780 dst
->reg
.reg
= read_object(ctx
);
781 dst
->reg
.base_offset
= blob_read_uint32(ctx
->blob
);
782 if (dest
.reg
.is_indirect
) {
783 dst
->reg
.indirect
= ralloc(instr
, nir_src
);
784 read_src(ctx
, dst
->reg
.indirect
, instr
);
790 are_object_ids_16bit(write_ctx
*ctx
)
792 /* Check the highest object ID, because they are monotonic. */
793 return ctx
->next_idx
< (1 << 16);
797 is_alu_src_ssa_16bit(write_ctx
*ctx
, const nir_alu_instr
*alu
)
799 unsigned num_srcs
= nir_op_infos
[alu
->op
].num_inputs
;
801 for (unsigned i
= 0; i
< num_srcs
; i
++) {
802 if (!alu
->src
[i
].src
.is_ssa
|| alu
->src
[i
].abs
|| alu
->src
[i
].negate
)
805 unsigned src_components
= nir_ssa_alu_instr_src_components(alu
, i
);
807 for (unsigned chan
= 0; chan
< src_components
; chan
++) {
808 /* The swizzles for src0.x and src1.x are stored
809 * in writemask_or_two_swizzles for SSA ALUs.
811 if (alu
->dest
.dest
.is_ssa
&& i
< 2 && chan
== 0 &&
812 alu
->src
[i
].swizzle
[chan
] < 4)
815 if (alu
->src
[i
].swizzle
[chan
] != chan
)
820 return are_object_ids_16bit(ctx
);
824 write_alu(write_ctx
*ctx
, const nir_alu_instr
*alu
)
826 unsigned num_srcs
= nir_op_infos
[alu
->op
].num_inputs
;
827 unsigned dst_components
= nir_dest_num_components(alu
->dest
.dest
);
829 /* 9 bits for nir_op */
830 STATIC_ASSERT(nir_num_opcodes
<= 512);
831 union packed_instr header
;
834 header
.alu
.instr_type
= alu
->instr
.type
;
835 header
.alu
.exact
= alu
->exact
;
836 header
.alu
.no_signed_wrap
= alu
->no_signed_wrap
;
837 header
.alu
.no_unsigned_wrap
= alu
->no_unsigned_wrap
;
838 header
.alu
.saturate
= alu
->dest
.saturate
;
839 header
.alu
.op
= alu
->op
;
840 header
.alu
.packed_src_ssa_16bit
= is_alu_src_ssa_16bit(ctx
, alu
);
842 if (header
.alu
.packed_src_ssa_16bit
&&
843 alu
->dest
.dest
.is_ssa
) {
844 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
845 header
.alu
.writemask_or_two_swizzles
= alu
->src
[0].swizzle
[0];
847 header
.alu
.writemask_or_two_swizzles
|= alu
->src
[1].swizzle
[0] << 2;
848 } else if (!alu
->dest
.dest
.is_ssa
&& dst_components
<= 4) {
849 /* For vec4 registers, this field is a writemask. */
850 header
.alu
.writemask_or_two_swizzles
= alu
->dest
.write_mask
;
853 write_dest(ctx
, &alu
->dest
.dest
, header
, alu
->instr
.type
);
855 if (!alu
->dest
.dest
.is_ssa
&& dst_components
> 4)
856 blob_write_uint32(ctx
->blob
, alu
->dest
.write_mask
);
858 if (header
.alu
.packed_src_ssa_16bit
) {
859 for (unsigned i
= 0; i
< num_srcs
; i
++) {
860 assert(alu
->src
[i
].src
.is_ssa
);
861 unsigned idx
= write_lookup_object(ctx
, alu
->src
[i
].src
.ssa
);
862 assert(idx
< (1 << 16));
863 blob_write_uint16(ctx
->blob
, idx
);
866 for (unsigned i
= 0; i
< num_srcs
; i
++) {
867 unsigned src_channels
= nir_ssa_alu_instr_src_components(alu
, i
);
868 unsigned src_components
= nir_src_num_components(alu
->src
[i
].src
);
869 union packed_src src
;
870 bool packed
= src_components
<= 4 && src_channels
<= 4;
873 src
.alu
.negate
= alu
->src
[i
].negate
;
874 src
.alu
.abs
= alu
->src
[i
].abs
;
877 src
.alu
.swizzle_x
= alu
->src
[i
].swizzle
[0];
878 src
.alu
.swizzle_y
= alu
->src
[i
].swizzle
[1];
879 src
.alu
.swizzle_z
= alu
->src
[i
].swizzle
[2];
880 src
.alu
.swizzle_w
= alu
->src
[i
].swizzle
[3];
883 write_src_full(ctx
, &alu
->src
[i
].src
, src
);
885 /* Store swizzles for vec8 and vec16. */
887 for (unsigned o
= 0; o
< src_channels
; o
+= 8) {
890 for (unsigned j
= 0; j
< 8 && o
+ j
< src_channels
; j
++) {
891 value
|= (uint32_t)alu
->src
[i
].swizzle
[o
+ j
] <<
892 (4 * j
); /* 4 bits per swizzle */
895 blob_write_uint32(ctx
->blob
, value
);
902 static nir_alu_instr
*
903 read_alu(read_ctx
*ctx
, union packed_instr header
)
905 unsigned num_srcs
= nir_op_infos
[header
.alu
.op
].num_inputs
;
906 nir_alu_instr
*alu
= nir_alu_instr_create(ctx
->nir
, header
.alu
.op
);
908 alu
->exact
= header
.alu
.exact
;
909 alu
->no_signed_wrap
= header
.alu
.no_signed_wrap
;
910 alu
->no_unsigned_wrap
= header
.alu
.no_unsigned_wrap
;
911 alu
->dest
.saturate
= header
.alu
.saturate
;
913 read_dest(ctx
, &alu
->dest
.dest
, &alu
->instr
, header
);
915 unsigned dst_components
= nir_dest_num_components(alu
->dest
.dest
);
917 if (alu
->dest
.dest
.is_ssa
) {
918 alu
->dest
.write_mask
= u_bit_consecutive(0, dst_components
);
919 } else if (dst_components
<= 4) {
920 alu
->dest
.write_mask
= header
.alu
.writemask_or_two_swizzles
;
922 alu
->dest
.write_mask
= blob_read_uint32(ctx
->blob
);
925 if (header
.alu
.packed_src_ssa_16bit
) {
926 for (unsigned i
= 0; i
< num_srcs
; i
++) {
927 nir_alu_src
*src
= &alu
->src
[i
];
928 src
->src
.is_ssa
= true;
929 src
->src
.ssa
= read_lookup_object(ctx
, blob_read_uint16(ctx
->blob
));
931 memset(&src
->swizzle
, 0, sizeof(src
->swizzle
));
933 unsigned src_components
= nir_ssa_alu_instr_src_components(alu
, i
);
935 for (unsigned chan
= 0; chan
< src_components
; chan
++)
936 src
->swizzle
[chan
] = chan
;
939 for (unsigned i
= 0; i
< num_srcs
; i
++) {
940 union packed_src src
= read_src(ctx
, &alu
->src
[i
].src
, &alu
->instr
);
941 unsigned src_channels
= nir_ssa_alu_instr_src_components(alu
, i
);
942 unsigned src_components
= nir_src_num_components(alu
->src
[i
].src
);
943 bool packed
= src_components
<= 4 && src_channels
<= 4;
945 alu
->src
[i
].negate
= src
.alu
.negate
;
946 alu
->src
[i
].abs
= src
.alu
.abs
;
948 memset(&alu
->src
[i
].swizzle
, 0, sizeof(alu
->src
[i
].swizzle
));
951 alu
->src
[i
].swizzle
[0] = src
.alu
.swizzle_x
;
952 alu
->src
[i
].swizzle
[1] = src
.alu
.swizzle_y
;
953 alu
->src
[i
].swizzle
[2] = src
.alu
.swizzle_z
;
954 alu
->src
[i
].swizzle
[3] = src
.alu
.swizzle_w
;
956 /* Load swizzles for vec8 and vec16. */
957 for (unsigned o
= 0; o
< src_channels
; o
+= 8) {
958 unsigned value
= blob_read_uint32(ctx
->blob
);
960 for (unsigned j
= 0; j
< 8 && o
+ j
< src_channels
; j
++) {
961 alu
->src
[i
].swizzle
[o
+ j
] =
962 (value
>> (4 * j
)) & 0xf; /* 4 bits per swizzle */
969 if (header
.alu
.packed_src_ssa_16bit
&&
970 alu
->dest
.dest
.is_ssa
) {
971 alu
->src
[0].swizzle
[0] = header
.alu
.writemask_or_two_swizzles
& 0x3;
973 alu
->src
[1].swizzle
[0] = header
.alu
.writemask_or_two_swizzles
>> 2;
980 write_deref(write_ctx
*ctx
, const nir_deref_instr
*deref
)
982 assert(deref
->deref_type
< 8);
983 assert(deref
->mode
< (1 << 10));
985 union packed_instr header
;
988 header
.deref
.instr_type
= deref
->instr
.type
;
989 header
.deref
.deref_type
= deref
->deref_type
;
991 if (deref
->deref_type
== nir_deref_type_cast
) {
992 header
.deref
.mode
= deref
->mode
;
993 header
.deref
.cast_type_same_as_last
= deref
->type
== ctx
->last_type
;
996 unsigned var_idx
= 0;
997 if (deref
->deref_type
== nir_deref_type_var
) {
998 var_idx
= write_lookup_object(ctx
, deref
->var
);
999 if (var_idx
&& var_idx
< (1 << 16))
1000 header
.deref_var
.object_idx
= var_idx
;
1003 if (deref
->deref_type
== nir_deref_type_array
||
1004 deref
->deref_type
== nir_deref_type_ptr_as_array
) {
1005 header
.deref
.packed_src_ssa_16bit
=
1006 deref
->parent
.is_ssa
&& deref
->arr
.index
.is_ssa
&&
1007 are_object_ids_16bit(ctx
);
1010 write_dest(ctx
, &deref
->dest
, header
, deref
->instr
.type
);
1012 switch (deref
->deref_type
) {
1013 case nir_deref_type_var
:
1014 if (!header
.deref_var
.object_idx
)
1015 blob_write_uint32(ctx
->blob
, var_idx
);
1018 case nir_deref_type_struct
:
1019 write_src(ctx
, &deref
->parent
);
1020 blob_write_uint32(ctx
->blob
, deref
->strct
.index
);
1023 case nir_deref_type_array
:
1024 case nir_deref_type_ptr_as_array
:
1025 if (header
.deref
.packed_src_ssa_16bit
) {
1026 blob_write_uint16(ctx
->blob
,
1027 write_lookup_object(ctx
, deref
->parent
.ssa
));
1028 blob_write_uint16(ctx
->blob
,
1029 write_lookup_object(ctx
, deref
->arr
.index
.ssa
));
1031 write_src(ctx
, &deref
->parent
);
1032 write_src(ctx
, &deref
->arr
.index
);
1036 case nir_deref_type_cast
:
1037 write_src(ctx
, &deref
->parent
);
1038 blob_write_uint32(ctx
->blob
, deref
->cast
.ptr_stride
);
1039 if (!header
.deref
.cast_type_same_as_last
) {
1040 encode_type_to_blob(ctx
->blob
, deref
->type
);
1041 ctx
->last_type
= deref
->type
;
1045 case nir_deref_type_array_wildcard
:
1046 write_src(ctx
, &deref
->parent
);
1050 unreachable("Invalid deref type");
1054 static nir_deref_instr
*
1055 read_deref(read_ctx
*ctx
, union packed_instr header
)
1057 nir_deref_type deref_type
= header
.deref
.deref_type
;
1058 nir_deref_instr
*deref
= nir_deref_instr_create(ctx
->nir
, deref_type
);
1060 read_dest(ctx
, &deref
->dest
, &deref
->instr
, header
);
1062 nir_deref_instr
*parent
;
1064 switch (deref
->deref_type
) {
1065 case nir_deref_type_var
:
1066 if (header
.deref_var
.object_idx
)
1067 deref
->var
= read_lookup_object(ctx
, header
.deref_var
.object_idx
);
1069 deref
->var
= read_object(ctx
);
1071 deref
->type
= deref
->var
->type
;
1074 case nir_deref_type_struct
:
1075 read_src(ctx
, &deref
->parent
, &deref
->instr
);
1076 parent
= nir_src_as_deref(deref
->parent
);
1077 deref
->strct
.index
= blob_read_uint32(ctx
->blob
);
1078 deref
->type
= glsl_get_struct_field(parent
->type
, deref
->strct
.index
);
1081 case nir_deref_type_array
:
1082 case nir_deref_type_ptr_as_array
:
1083 if (header
.deref
.packed_src_ssa_16bit
) {
1084 deref
->parent
.is_ssa
= true;
1085 deref
->parent
.ssa
= read_lookup_object(ctx
, blob_read_uint16(ctx
->blob
));
1086 deref
->arr
.index
.is_ssa
= true;
1087 deref
->arr
.index
.ssa
= read_lookup_object(ctx
, blob_read_uint16(ctx
->blob
));
1089 read_src(ctx
, &deref
->parent
, &deref
->instr
);
1090 read_src(ctx
, &deref
->arr
.index
, &deref
->instr
);
1093 parent
= nir_src_as_deref(deref
->parent
);
1094 if (deref
->deref_type
== nir_deref_type_array
)
1095 deref
->type
= glsl_get_array_element(parent
->type
);
1097 deref
->type
= parent
->type
;
1100 case nir_deref_type_cast
:
1101 read_src(ctx
, &deref
->parent
, &deref
->instr
);
1102 deref
->cast
.ptr_stride
= blob_read_uint32(ctx
->blob
);
1103 if (header
.deref
.cast_type_same_as_last
) {
1104 deref
->type
= ctx
->last_type
;
1106 deref
->type
= decode_type_from_blob(ctx
->blob
);
1107 ctx
->last_type
= deref
->type
;
1111 case nir_deref_type_array_wildcard
:
1112 read_src(ctx
, &deref
->parent
, &deref
->instr
);
1113 parent
= nir_src_as_deref(deref
->parent
);
1114 deref
->type
= glsl_get_array_element(parent
->type
);
1118 unreachable("Invalid deref type");
1121 if (deref_type
== nir_deref_type_var
) {
1122 deref
->mode
= deref
->var
->data
.mode
;
1123 } else if (deref
->deref_type
== nir_deref_type_cast
) {
1124 deref
->mode
= header
.deref
.mode
;
1126 assert(deref
->parent
.is_ssa
);
1127 deref
->mode
= nir_instr_as_deref(deref
->parent
.ssa
->parent_instr
)->mode
;
1134 write_intrinsic(write_ctx
*ctx
, const nir_intrinsic_instr
*intrin
)
1136 /* 9 bits for nir_intrinsic_op */
1137 STATIC_ASSERT(nir_num_intrinsics
<= 512);
1138 unsigned num_srcs
= nir_intrinsic_infos
[intrin
->intrinsic
].num_srcs
;
1139 unsigned num_indices
= nir_intrinsic_infos
[intrin
->intrinsic
].num_indices
;
1140 assert(intrin
->intrinsic
< 512);
1142 union packed_instr header
;
1145 header
.intrinsic
.instr_type
= intrin
->instr
.type
;
1146 header
.intrinsic
.intrinsic
= intrin
->intrinsic
;
1148 /* Analyze constant indices to decide how to encode them. */
1150 unsigned max_bits
= 0;
1151 for (unsigned i
= 0; i
< num_indices
; i
++) {
1152 unsigned max
= util_last_bit(intrin
->const_index
[i
]);
1153 max_bits
= MAX2(max_bits
, max
);
1156 if (max_bits
* num_indices
<= 9) {
1157 header
.intrinsic
.const_indices_encoding
= const_indices_9bit_all_combined
;
1159 /* Pack all const indices into 6 bits. */
1160 unsigned bit_size
= 9 / num_indices
;
1161 for (unsigned i
= 0; i
< num_indices
; i
++) {
1162 header
.intrinsic
.packed_const_indices
|=
1163 intrin
->const_index
[i
] << (i
* bit_size
);
1165 } else if (max_bits
<= 8)
1166 header
.intrinsic
.const_indices_encoding
= const_indices_8bit
;
1167 else if (max_bits
<= 16)
1168 header
.intrinsic
.const_indices_encoding
= const_indices_16bit
;
1170 header
.intrinsic
.const_indices_encoding
= const_indices_32bit
;
1173 if (nir_intrinsic_infos
[intrin
->intrinsic
].has_dest
)
1174 write_dest(ctx
, &intrin
->dest
, header
, intrin
->instr
.type
);
1176 blob_write_uint32(ctx
->blob
, header
.u32
);
1178 for (unsigned i
= 0; i
< num_srcs
; i
++)
1179 write_src(ctx
, &intrin
->src
[i
]);
1182 switch (header
.intrinsic
.const_indices_encoding
) {
1183 case const_indices_8bit
:
1184 for (unsigned i
= 0; i
< num_indices
; i
++)
1185 blob_write_uint8(ctx
->blob
, intrin
->const_index
[i
]);
1187 case const_indices_16bit
:
1188 for (unsigned i
= 0; i
< num_indices
; i
++)
1189 blob_write_uint16(ctx
->blob
, intrin
->const_index
[i
]);
1191 case const_indices_32bit
:
1192 for (unsigned i
= 0; i
< num_indices
; i
++)
1193 blob_write_uint32(ctx
->blob
, intrin
->const_index
[i
]);
1199 static nir_intrinsic_instr
*
1200 read_intrinsic(read_ctx
*ctx
, union packed_instr header
)
1202 nir_intrinsic_op op
= header
.intrinsic
.intrinsic
;
1203 nir_intrinsic_instr
*intrin
= nir_intrinsic_instr_create(ctx
->nir
, op
);
1205 unsigned num_srcs
= nir_intrinsic_infos
[op
].num_srcs
;
1206 unsigned num_indices
= nir_intrinsic_infos
[op
].num_indices
;
1208 if (nir_intrinsic_infos
[op
].has_dest
)
1209 read_dest(ctx
, &intrin
->dest
, &intrin
->instr
, header
);
1211 for (unsigned i
= 0; i
< num_srcs
; i
++)
1212 read_src(ctx
, &intrin
->src
[i
], &intrin
->instr
);
1214 /* Vectorized instrinsics have num_components same as dst or src that has
1215 * 0 components in the info. Find it.
1217 if (nir_intrinsic_infos
[op
].has_dest
&&
1218 nir_intrinsic_infos
[op
].dest_components
== 0) {
1219 intrin
->num_components
= nir_dest_num_components(intrin
->dest
);
1221 for (unsigned i
= 0; i
< num_srcs
; i
++) {
1222 if (nir_intrinsic_infos
[op
].src_components
[i
] == 0) {
1223 intrin
->num_components
= nir_src_num_components(intrin
->src
[i
]);
1230 switch (header
.intrinsic
.const_indices_encoding
) {
1231 case const_indices_9bit_all_combined
: {
1232 unsigned bit_size
= 9 / num_indices
;
1233 unsigned bit_mask
= u_bit_consecutive(0, bit_size
);
1234 for (unsigned i
= 0; i
< num_indices
; i
++) {
1235 intrin
->const_index
[i
] =
1236 (header
.intrinsic
.packed_const_indices
>> (i
* bit_size
)) &
1241 case const_indices_8bit
:
1242 for (unsigned i
= 0; i
< num_indices
; i
++)
1243 intrin
->const_index
[i
] = blob_read_uint8(ctx
->blob
);
1245 case const_indices_16bit
:
1246 for (unsigned i
= 0; i
< num_indices
; i
++)
1247 intrin
->const_index
[i
] = blob_read_uint16(ctx
->blob
);
1249 case const_indices_32bit
:
1250 for (unsigned i
= 0; i
< num_indices
; i
++)
1251 intrin
->const_index
[i
] = blob_read_uint32(ctx
->blob
);
1260 write_load_const(write_ctx
*ctx
, const nir_load_const_instr
*lc
)
1262 assert(lc
->def
.num_components
>= 1 && lc
->def
.num_components
<= 16);
1263 union packed_instr header
;
1266 header
.load_const
.instr_type
= lc
->instr
.type
;
1267 header
.load_const
.last_component
= lc
->def
.num_components
- 1;
1268 header
.load_const
.bit_size
= encode_bit_size_3bits(lc
->def
.bit_size
);
1269 header
.load_const
.packing
= load_const_full
;
1271 /* Try to pack 1-component constants into the 19 free bits in the header. */
1272 if (lc
->def
.num_components
== 1) {
1273 switch (lc
->def
.bit_size
) {
1275 if ((lc
->value
[0].u64
& 0x1fffffffffffull
) == 0) {
1276 /* packed_value contains high 19 bits, low bits are 0 */
1277 header
.load_const
.packing
= load_const_scalar_hi_19bits
;
1278 header
.load_const
.packed_value
= lc
->value
[0].u64
>> 45;
1279 } else if (((lc
->value
[0].i64
<< 45) >> 45) == lc
->value
[0].i64
) {
1280 /* packed_value contains low 19 bits, high bits are sign-extended */
1281 header
.load_const
.packing
= load_const_scalar_lo_19bits_sext
;
1282 header
.load_const
.packed_value
= lc
->value
[0].u64
;
1287 if ((lc
->value
[0].u32
& 0x1fff) == 0) {
1288 header
.load_const
.packing
= load_const_scalar_hi_19bits
;
1289 header
.load_const
.packed_value
= lc
->value
[0].u32
>> 13;
1290 } else if (((lc
->value
[0].i32
<< 13) >> 13) == lc
->value
[0].i32
) {
1291 header
.load_const
.packing
= load_const_scalar_lo_19bits_sext
;
1292 header
.load_const
.packed_value
= lc
->value
[0].u32
;
1297 header
.load_const
.packing
= load_const_scalar_lo_19bits_sext
;
1298 header
.load_const
.packed_value
= lc
->value
[0].u16
;
1301 header
.load_const
.packing
= load_const_scalar_lo_19bits_sext
;
1302 header
.load_const
.packed_value
= lc
->value
[0].u8
;
1305 header
.load_const
.packing
= load_const_scalar_lo_19bits_sext
;
1306 header
.load_const
.packed_value
= lc
->value
[0].b
;
1309 unreachable("invalid bit_size");
1313 blob_write_uint32(ctx
->blob
, header
.u32
);
1315 if (header
.load_const
.packing
== load_const_full
) {
1316 switch (lc
->def
.bit_size
) {
1318 blob_write_bytes(ctx
->blob
, lc
->value
,
1319 sizeof(*lc
->value
) * lc
->def
.num_components
);
1323 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
1324 blob_write_uint32(ctx
->blob
, lc
->value
[i
].u32
);
1328 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
1329 blob_write_uint16(ctx
->blob
, lc
->value
[i
].u16
);
1333 assert(lc
->def
.bit_size
<= 8);
1334 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
1335 blob_write_uint8(ctx
->blob
, lc
->value
[i
].u8
);
1340 write_add_object(ctx
, &lc
->def
);
1343 static nir_load_const_instr
*
1344 read_load_const(read_ctx
*ctx
, union packed_instr header
)
1346 nir_load_const_instr
*lc
=
1347 nir_load_const_instr_create(ctx
->nir
, header
.load_const
.last_component
+ 1,
1348 decode_bit_size_3bits(header
.load_const
.bit_size
));
1350 switch (header
.load_const
.packing
) {
1351 case load_const_scalar_hi_19bits
:
1352 switch (lc
->def
.bit_size
) {
1354 lc
->value
[0].u64
= (uint64_t)header
.load_const
.packed_value
<< 45;
1357 lc
->value
[0].u32
= (uint64_t)header
.load_const
.packed_value
<< 13;
1360 unreachable("invalid bit_size");
1364 case load_const_scalar_lo_19bits_sext
:
1365 switch (lc
->def
.bit_size
) {
1367 lc
->value
[0].i64
= ((int64_t)header
.load_const
.packed_value
<< 45) >> 45;
1370 lc
->value
[0].i32
= ((int32_t)header
.load_const
.packed_value
<< 13) >> 13;
1373 lc
->value
[0].u16
= header
.load_const
.packed_value
;
1376 lc
->value
[0].u8
= header
.load_const
.packed_value
;
1379 lc
->value
[0].b
= header
.load_const
.packed_value
;
1382 unreachable("invalid bit_size");
1386 case load_const_full
:
1387 switch (lc
->def
.bit_size
) {
1389 blob_copy_bytes(ctx
->blob
, lc
->value
, sizeof(*lc
->value
) * lc
->def
.num_components
);
1393 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
1394 lc
->value
[i
].u32
= blob_read_uint32(ctx
->blob
);
1398 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
1399 lc
->value
[i
].u16
= blob_read_uint16(ctx
->blob
);
1403 assert(lc
->def
.bit_size
<= 8);
1404 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
1405 lc
->value
[i
].u8
= blob_read_uint8(ctx
->blob
);
1411 read_add_object(ctx
, &lc
->def
);
1416 write_ssa_undef(write_ctx
*ctx
, const nir_ssa_undef_instr
*undef
)
1418 assert(undef
->def
.num_components
>= 1 && undef
->def
.num_components
<= 16);
1420 union packed_instr header
;
1423 header
.undef
.instr_type
= undef
->instr
.type
;
1424 header
.undef
.last_component
= undef
->def
.num_components
- 1;
1425 header
.undef
.bit_size
= encode_bit_size_3bits(undef
->def
.bit_size
);
1427 blob_write_uint32(ctx
->blob
, header
.u32
);
1428 write_add_object(ctx
, &undef
->def
);
1431 static nir_ssa_undef_instr
*
1432 read_ssa_undef(read_ctx
*ctx
, union packed_instr header
)
1434 nir_ssa_undef_instr
*undef
=
1435 nir_ssa_undef_instr_create(ctx
->nir
, header
.undef
.last_component
+ 1,
1436 decode_bit_size_3bits(header
.undef
.bit_size
));
1438 read_add_object(ctx
, &undef
->def
);
1442 union packed_tex_data
{
1445 unsigned sampler_dim
:4;
1446 unsigned dest_type
:8;
1447 unsigned coord_components
:3;
1448 unsigned is_array
:1;
1449 unsigned is_shadow
:1;
1450 unsigned is_new_style_shadow
:1;
1451 unsigned component
:2;
1452 unsigned texture_non_uniform
:1;
1453 unsigned sampler_non_uniform
:1;
1454 unsigned unused
:8; /* Mark unused for valgrind. */
1459 write_tex(write_ctx
*ctx
, const nir_tex_instr
*tex
)
1461 assert(tex
->num_srcs
< 16);
1462 assert(tex
->op
< 16);
1464 union packed_instr header
;
1467 header
.tex
.instr_type
= tex
->instr
.type
;
1468 header
.tex
.num_srcs
= tex
->num_srcs
;
1469 header
.tex
.op
= tex
->op
;
1471 write_dest(ctx
, &tex
->dest
, header
, tex
->instr
.type
);
1473 blob_write_uint32(ctx
->blob
, tex
->texture_index
);
1474 blob_write_uint32(ctx
->blob
, tex
->sampler_index
);
1475 if (tex
->op
== nir_texop_tg4
)
1476 blob_write_bytes(ctx
->blob
, tex
->tg4_offsets
, sizeof(tex
->tg4_offsets
));
1478 STATIC_ASSERT(sizeof(union packed_tex_data
) == sizeof(uint32_t));
1479 union packed_tex_data packed
= {
1480 .u
.sampler_dim
= tex
->sampler_dim
,
1481 .u
.dest_type
= tex
->dest_type
,
1482 .u
.coord_components
= tex
->coord_components
,
1483 .u
.is_array
= tex
->is_array
,
1484 .u
.is_shadow
= tex
->is_shadow
,
1485 .u
.is_new_style_shadow
= tex
->is_new_style_shadow
,
1486 .u
.component
= tex
->component
,
1487 .u
.texture_non_uniform
= tex
->texture_non_uniform
,
1488 .u
.sampler_non_uniform
= tex
->sampler_non_uniform
,
1490 blob_write_uint32(ctx
->blob
, packed
.u32
);
1492 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
1493 union packed_src src
;
1495 src
.tex
.src_type
= tex
->src
[i
].src_type
;
1496 write_src_full(ctx
, &tex
->src
[i
].src
, src
);
1500 static nir_tex_instr
*
1501 read_tex(read_ctx
*ctx
, union packed_instr header
)
1503 nir_tex_instr
*tex
= nir_tex_instr_create(ctx
->nir
, header
.tex
.num_srcs
);
1505 read_dest(ctx
, &tex
->dest
, &tex
->instr
, header
);
1507 tex
->op
= header
.tex
.op
;
1508 tex
->texture_index
= blob_read_uint32(ctx
->blob
);
1509 tex
->sampler_index
= blob_read_uint32(ctx
->blob
);
1510 if (tex
->op
== nir_texop_tg4
)
1511 blob_copy_bytes(ctx
->blob
, tex
->tg4_offsets
, sizeof(tex
->tg4_offsets
));
1513 union packed_tex_data packed
;
1514 packed
.u32
= blob_read_uint32(ctx
->blob
);
1515 tex
->sampler_dim
= packed
.u
.sampler_dim
;
1516 tex
->dest_type
= packed
.u
.dest_type
;
1517 tex
->coord_components
= packed
.u
.coord_components
;
1518 tex
->is_array
= packed
.u
.is_array
;
1519 tex
->is_shadow
= packed
.u
.is_shadow
;
1520 tex
->is_new_style_shadow
= packed
.u
.is_new_style_shadow
;
1521 tex
->component
= packed
.u
.component
;
1522 tex
->texture_non_uniform
= packed
.u
.texture_non_uniform
;
1523 tex
->sampler_non_uniform
= packed
.u
.sampler_non_uniform
;
1525 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
1526 union packed_src src
= read_src(ctx
, &tex
->src
[i
].src
, &tex
->instr
);
1527 tex
->src
[i
].src_type
= src
.tex
.src_type
;
1534 write_phi(write_ctx
*ctx
, const nir_phi_instr
*phi
)
1536 union packed_instr header
;
1539 header
.phi
.instr_type
= phi
->instr
.type
;
1540 header
.phi
.num_srcs
= exec_list_length(&phi
->srcs
);
1542 /* Phi nodes are special, since they may reference SSA definitions and
1543 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1544 * and then store enough information so that a later fixup pass can fill
1545 * them in correctly.
1547 write_dest(ctx
, &phi
->dest
, header
, phi
->instr
.type
);
1549 nir_foreach_phi_src(src
, phi
) {
1550 assert(src
->src
.is_ssa
);
1551 size_t blob_offset
= blob_reserve_uint32(ctx
->blob
);
1552 ASSERTED
size_t blob_offset2
= blob_reserve_uint32(ctx
->blob
);
1553 assert(blob_offset
+ sizeof(uint32_t) == blob_offset2
);
1554 write_phi_fixup fixup
= {
1555 .blob_offset
= blob_offset
,
1556 .src
= src
->src
.ssa
,
1559 util_dynarray_append(&ctx
->phi_fixups
, write_phi_fixup
, fixup
);
1564 write_fixup_phis(write_ctx
*ctx
)
1566 util_dynarray_foreach(&ctx
->phi_fixups
, write_phi_fixup
, fixup
) {
1567 uint32_t *blob_ptr
= (uint32_t *)(ctx
->blob
->data
+ fixup
->blob_offset
);
1568 blob_ptr
[0] = write_lookup_object(ctx
, fixup
->src
);
1569 blob_ptr
[1] = write_lookup_object(ctx
, fixup
->block
);
1572 util_dynarray_clear(&ctx
->phi_fixups
);
1575 static nir_phi_instr
*
1576 read_phi(read_ctx
*ctx
, nir_block
*blk
, union packed_instr header
)
1578 nir_phi_instr
*phi
= nir_phi_instr_create(ctx
->nir
);
1580 read_dest(ctx
, &phi
->dest
, &phi
->instr
, header
);
1582 /* For similar reasons as before, we just store the index directly into the
1583 * pointer, and let a later pass resolve the phi sources.
1585 * In order to ensure that the copied sources (which are just the indices
1586 * from the blob for now) don't get inserted into the old shader's use-def
1587 * lists, we have to add the phi instruction *before* we set up its
1590 nir_instr_insert_after_block(blk
, &phi
->instr
);
1592 for (unsigned i
= 0; i
< header
.phi
.num_srcs
; i
++) {
1593 nir_phi_src
*src
= ralloc(phi
, nir_phi_src
);
1595 src
->src
.is_ssa
= true;
1596 src
->src
.ssa
= (nir_ssa_def
*)(uintptr_t) blob_read_uint32(ctx
->blob
);
1597 src
->pred
= (nir_block
*)(uintptr_t) blob_read_uint32(ctx
->blob
);
1599 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1600 * we have to set the parent_instr manually. It doesn't really matter
1601 * when we do it, so we might as well do it here.
1603 src
->src
.parent_instr
= &phi
->instr
;
1605 /* Stash it in the list of phi sources. We'll walk this list and fix up
1606 * sources at the very end of read_function_impl.
1608 list_add(&src
->src
.use_link
, &ctx
->phi_srcs
);
1610 exec_list_push_tail(&phi
->srcs
, &src
->node
);
1617 read_fixup_phis(read_ctx
*ctx
)
1619 list_for_each_entry_safe(nir_phi_src
, src
, &ctx
->phi_srcs
, src
.use_link
) {
1620 src
->pred
= read_lookup_object(ctx
, (uintptr_t)src
->pred
);
1621 src
->src
.ssa
= read_lookup_object(ctx
, (uintptr_t)src
->src
.ssa
);
1623 /* Remove from this list */
1624 list_del(&src
->src
.use_link
);
1626 list_addtail(&src
->src
.use_link
, &src
->src
.ssa
->uses
);
1628 assert(list_is_empty(&ctx
->phi_srcs
));
1632 write_jump(write_ctx
*ctx
, const nir_jump_instr
*jmp
)
1634 assert(jmp
->type
< 4);
1636 union packed_instr header
;
1639 header
.jump
.instr_type
= jmp
->instr
.type
;
1640 header
.jump
.type
= jmp
->type
;
1642 blob_write_uint32(ctx
->blob
, header
.u32
);
1645 static nir_jump_instr
*
1646 read_jump(read_ctx
*ctx
, union packed_instr header
)
1648 nir_jump_instr
*jmp
= nir_jump_instr_create(ctx
->nir
, header
.jump
.type
);
1653 write_call(write_ctx
*ctx
, const nir_call_instr
*call
)
1655 blob_write_uint32(ctx
->blob
, write_lookup_object(ctx
, call
->callee
));
1657 for (unsigned i
= 0; i
< call
->num_params
; i
++)
1658 write_src(ctx
, &call
->params
[i
]);
1661 static nir_call_instr
*
1662 read_call(read_ctx
*ctx
)
1664 nir_function
*callee
= read_object(ctx
);
1665 nir_call_instr
*call
= nir_call_instr_create(ctx
->nir
, callee
);
1667 for (unsigned i
= 0; i
< call
->num_params
; i
++)
1668 read_src(ctx
, &call
->params
[i
], call
);
1674 write_instr(write_ctx
*ctx
, const nir_instr
*instr
)
1676 /* We have only 4 bits for the instruction type. */
1677 assert(instr
->type
< 16);
1679 switch (instr
->type
) {
1680 case nir_instr_type_alu
:
1681 write_alu(ctx
, nir_instr_as_alu(instr
));
1683 case nir_instr_type_deref
:
1684 write_deref(ctx
, nir_instr_as_deref(instr
));
1686 case nir_instr_type_intrinsic
:
1687 write_intrinsic(ctx
, nir_instr_as_intrinsic(instr
));
1689 case nir_instr_type_load_const
:
1690 write_load_const(ctx
, nir_instr_as_load_const(instr
));
1692 case nir_instr_type_ssa_undef
:
1693 write_ssa_undef(ctx
, nir_instr_as_ssa_undef(instr
));
1695 case nir_instr_type_tex
:
1696 write_tex(ctx
, nir_instr_as_tex(instr
));
1698 case nir_instr_type_phi
:
1699 write_phi(ctx
, nir_instr_as_phi(instr
));
1701 case nir_instr_type_jump
:
1702 write_jump(ctx
, nir_instr_as_jump(instr
));
1704 case nir_instr_type_call
:
1705 blob_write_uint32(ctx
->blob
, instr
->type
);
1706 write_call(ctx
, nir_instr_as_call(instr
));
1708 case nir_instr_type_parallel_copy
:
1709 unreachable("Cannot write parallel copies");
1711 unreachable("bad instr type");
1715 /* Return the number of instructions read. */
1717 read_instr(read_ctx
*ctx
, nir_block
*block
)
1719 STATIC_ASSERT(sizeof(union packed_instr
) == 4);
1720 union packed_instr header
;
1721 header
.u32
= blob_read_uint32(ctx
->blob
);
1724 switch (header
.any
.instr_type
) {
1725 case nir_instr_type_alu
:
1726 for (unsigned i
= 0; i
<= header
.alu
.num_followup_alu_sharing_header
; i
++)
1727 nir_instr_insert_after_block(block
, &read_alu(ctx
, header
)->instr
);
1728 return header
.alu
.num_followup_alu_sharing_header
+ 1;
1729 case nir_instr_type_deref
:
1730 instr
= &read_deref(ctx
, header
)->instr
;
1732 case nir_instr_type_intrinsic
:
1733 instr
= &read_intrinsic(ctx
, header
)->instr
;
1735 case nir_instr_type_load_const
:
1736 instr
= &read_load_const(ctx
, header
)->instr
;
1738 case nir_instr_type_ssa_undef
:
1739 instr
= &read_ssa_undef(ctx
, header
)->instr
;
1741 case nir_instr_type_tex
:
1742 instr
= &read_tex(ctx
, header
)->instr
;
1744 case nir_instr_type_phi
:
1745 /* Phi instructions are a bit of a special case when reading because we
1746 * don't want inserting the instruction to automatically handle use/defs
1747 * for us. Instead, we need to wait until all the blocks/instructions
1748 * are read so that we can set their sources up.
1750 read_phi(ctx
, block
, header
);
1752 case nir_instr_type_jump
:
1753 instr
= &read_jump(ctx
, header
)->instr
;
1755 case nir_instr_type_call
:
1756 instr
= &read_call(ctx
)->instr
;
1758 case nir_instr_type_parallel_copy
:
1759 unreachable("Cannot read parallel copies");
1761 unreachable("bad instr type");
1764 nir_instr_insert_after_block(block
, instr
);
1769 write_block(write_ctx
*ctx
, const nir_block
*block
)
1771 write_add_object(ctx
, block
);
1772 blob_write_uint32(ctx
->blob
, exec_list_length(&block
->instr_list
));
1774 ctx
->last_instr_type
= ~0;
1775 ctx
->last_alu_header_offset
= 0;
1777 nir_foreach_instr(instr
, block
) {
1778 write_instr(ctx
, instr
);
1779 ctx
->last_instr_type
= instr
->type
;
1784 read_block(read_ctx
*ctx
, struct exec_list
*cf_list
)
1786 /* Don't actually create a new block. Just use the one from the tail of
1787 * the list. NIR guarantees that the tail of the list is a block and that
1788 * no two blocks are side-by-side in the IR; It should be empty.
1791 exec_node_data(nir_block
, exec_list_get_tail(cf_list
), cf_node
.node
);
1793 read_add_object(ctx
, block
);
1794 unsigned num_instrs
= blob_read_uint32(ctx
->blob
);
1795 for (unsigned i
= 0; i
< num_instrs
;) {
1796 i
+= read_instr(ctx
, block
);
1801 write_cf_list(write_ctx
*ctx
, const struct exec_list
*cf_list
);
1804 read_cf_list(read_ctx
*ctx
, struct exec_list
*cf_list
);
1807 write_if(write_ctx
*ctx
, nir_if
*nif
)
1809 write_src(ctx
, &nif
->condition
);
1811 write_cf_list(ctx
, &nif
->then_list
);
1812 write_cf_list(ctx
, &nif
->else_list
);
1816 read_if(read_ctx
*ctx
, struct exec_list
*cf_list
)
1818 nir_if
*nif
= nir_if_create(ctx
->nir
);
1820 read_src(ctx
, &nif
->condition
, nif
);
1822 nir_cf_node_insert_end(cf_list
, &nif
->cf_node
);
1824 read_cf_list(ctx
, &nif
->then_list
);
1825 read_cf_list(ctx
, &nif
->else_list
);
1829 write_loop(write_ctx
*ctx
, nir_loop
*loop
)
1831 write_cf_list(ctx
, &loop
->body
);
1835 read_loop(read_ctx
*ctx
, struct exec_list
*cf_list
)
1837 nir_loop
*loop
= nir_loop_create(ctx
->nir
);
1839 nir_cf_node_insert_end(cf_list
, &loop
->cf_node
);
1841 read_cf_list(ctx
, &loop
->body
);
1845 write_cf_node(write_ctx
*ctx
, nir_cf_node
*cf
)
1847 blob_write_uint32(ctx
->blob
, cf
->type
);
1850 case nir_cf_node_block
:
1851 write_block(ctx
, nir_cf_node_as_block(cf
));
1853 case nir_cf_node_if
:
1854 write_if(ctx
, nir_cf_node_as_if(cf
));
1856 case nir_cf_node_loop
:
1857 write_loop(ctx
, nir_cf_node_as_loop(cf
));
1860 unreachable("bad cf type");
1865 read_cf_node(read_ctx
*ctx
, struct exec_list
*list
)
1867 nir_cf_node_type type
= blob_read_uint32(ctx
->blob
);
1870 case nir_cf_node_block
:
1871 read_block(ctx
, list
);
1873 case nir_cf_node_if
:
1876 case nir_cf_node_loop
:
1877 read_loop(ctx
, list
);
1880 unreachable("bad cf type");
1885 write_cf_list(write_ctx
*ctx
, const struct exec_list
*cf_list
)
1887 blob_write_uint32(ctx
->blob
, exec_list_length(cf_list
));
1888 foreach_list_typed(nir_cf_node
, cf
, node
, cf_list
) {
1889 write_cf_node(ctx
, cf
);
1894 read_cf_list(read_ctx
*ctx
, struct exec_list
*cf_list
)
1896 uint32_t num_cf_nodes
= blob_read_uint32(ctx
->blob
);
1897 for (unsigned i
= 0; i
< num_cf_nodes
; i
++)
1898 read_cf_node(ctx
, cf_list
);
1902 write_function_impl(write_ctx
*ctx
, const nir_function_impl
*fi
)
1904 blob_write_uint8(ctx
->blob
, fi
->structured
);
1906 write_var_list(ctx
, &fi
->locals
);
1907 write_reg_list(ctx
, &fi
->registers
);
1908 blob_write_uint32(ctx
->blob
, fi
->reg_alloc
);
1910 write_cf_list(ctx
, &fi
->body
);
1911 write_fixup_phis(ctx
);
1914 static nir_function_impl
*
1915 read_function_impl(read_ctx
*ctx
, nir_function
*fxn
)
1917 nir_function_impl
*fi
= nir_function_impl_create_bare(ctx
->nir
);
1920 fi
->structured
= blob_read_uint8(ctx
->blob
);
1922 read_var_list(ctx
, &fi
->locals
);
1923 read_reg_list(ctx
, &fi
->registers
);
1924 fi
->reg_alloc
= blob_read_uint32(ctx
->blob
);
1926 read_cf_list(ctx
, &fi
->body
);
1927 read_fixup_phis(ctx
);
1929 fi
->valid_metadata
= 0;
1935 write_function(write_ctx
*ctx
, const nir_function
*fxn
)
1937 uint32_t flags
= fxn
->is_entrypoint
;
1942 blob_write_uint32(ctx
->blob
, flags
);
1944 blob_write_string(ctx
->blob
, fxn
->name
);
1946 write_add_object(ctx
, fxn
);
1948 blob_write_uint32(ctx
->blob
, fxn
->num_params
);
1949 for (unsigned i
= 0; i
< fxn
->num_params
; i
++) {
1951 ((uint32_t)fxn
->params
[i
].num_components
) |
1952 ((uint32_t)fxn
->params
[i
].bit_size
) << 8;
1953 blob_write_uint32(ctx
->blob
, val
);
1956 /* At first glance, it looks like we should write the function_impl here.
1957 * However, call instructions need to be able to reference at least the
1958 * function and those will get processed as we write the function_impls.
1959 * We stop here and write function_impls as a second pass.
1964 read_function(read_ctx
*ctx
)
1966 uint32_t flags
= blob_read_uint32(ctx
->blob
);
1967 bool has_name
= flags
& 0x2;
1968 char *name
= has_name
? blob_read_string(ctx
->blob
) : NULL
;
1970 nir_function
*fxn
= nir_function_create(ctx
->nir
, name
);
1972 read_add_object(ctx
, fxn
);
1974 fxn
->num_params
= blob_read_uint32(ctx
->blob
);
1975 fxn
->params
= ralloc_array(fxn
, nir_parameter
, fxn
->num_params
);
1976 for (unsigned i
= 0; i
< fxn
->num_params
; i
++) {
1977 uint32_t val
= blob_read_uint32(ctx
->blob
);
1978 fxn
->params
[i
].num_components
= val
& 0xff;
1979 fxn
->params
[i
].bit_size
= (val
>> 8) & 0xff;
1982 fxn
->is_entrypoint
= flags
& 0x1;
1984 fxn
->impl
= NIR_SERIALIZE_FUNC_HAS_IMPL
;
1988 * Serialize NIR into a binary blob.
1990 * \param strip Don't serialize information only useful for debugging,
1991 * such as variable names, making cache hits from similar
1992 * shaders more likely.
1995 nir_serialize(struct blob
*blob
, const nir_shader
*nir
, bool strip
)
1997 write_ctx ctx
= {0};
1998 ctx
.remap_table
= _mesa_pointer_hash_table_create(NULL
);
2002 util_dynarray_init(&ctx
.phi_fixups
, NULL
);
2004 size_t idx_size_offset
= blob_reserve_uint32(blob
);
2006 struct shader_info info
= nir
->info
;
2007 uint32_t strings
= 0;
2008 if (!strip
&& info
.name
)
2010 if (!strip
&& info
.label
)
2012 blob_write_uint32(blob
, strings
);
2013 if (!strip
&& info
.name
)
2014 blob_write_string(blob
, info
.name
);
2015 if (!strip
&& info
.label
)
2016 blob_write_string(blob
, info
.label
);
2017 info
.name
= info
.label
= NULL
;
2018 blob_write_bytes(blob
, (uint8_t *) &info
, sizeof(info
));
2020 write_var_list(&ctx
, &nir
->variables
);
2022 blob_write_uint32(blob
, nir
->num_inputs
);
2023 blob_write_uint32(blob
, nir
->num_uniforms
);
2024 blob_write_uint32(blob
, nir
->num_outputs
);
2025 blob_write_uint32(blob
, nir
->num_shared
);
2026 blob_write_uint32(blob
, nir
->scratch_size
);
2028 blob_write_uint32(blob
, exec_list_length(&nir
->functions
));
2029 nir_foreach_function(fxn
, nir
) {
2030 write_function(&ctx
, fxn
);
2033 nir_foreach_function(fxn
, nir
) {
2035 write_function_impl(&ctx
, fxn
->impl
);
2038 blob_write_uint32(blob
, nir
->constant_data_size
);
2039 if (nir
->constant_data_size
> 0)
2040 blob_write_bytes(blob
, nir
->constant_data
, nir
->constant_data_size
);
2042 *(uint32_t *)(blob
->data
+ idx_size_offset
) = ctx
.next_idx
;
2044 _mesa_hash_table_destroy(ctx
.remap_table
, NULL
);
2045 util_dynarray_fini(&ctx
.phi_fixups
);
2049 nir_deserialize(void *mem_ctx
,
2050 const struct nir_shader_compiler_options
*options
,
2051 struct blob_reader
*blob
)
2055 list_inithead(&ctx
.phi_srcs
);
2056 ctx
.idx_table_len
= blob_read_uint32(blob
);
2057 ctx
.idx_table
= calloc(ctx
.idx_table_len
, sizeof(uintptr_t));
2059 uint32_t strings
= blob_read_uint32(blob
);
2060 char *name
= (strings
& 0x1) ? blob_read_string(blob
) : NULL
;
2061 char *label
= (strings
& 0x2) ? blob_read_string(blob
) : NULL
;
2063 struct shader_info info
;
2064 blob_copy_bytes(blob
, (uint8_t *) &info
, sizeof(info
));
2066 ctx
.nir
= nir_shader_create(mem_ctx
, info
.stage
, options
, NULL
);
2068 info
.name
= name
? ralloc_strdup(ctx
.nir
, name
) : NULL
;
2069 info
.label
= label
? ralloc_strdup(ctx
.nir
, label
) : NULL
;
2071 ctx
.nir
->info
= info
;
2073 read_var_list(&ctx
, &ctx
.nir
->variables
);
2075 ctx
.nir
->num_inputs
= blob_read_uint32(blob
);
2076 ctx
.nir
->num_uniforms
= blob_read_uint32(blob
);
2077 ctx
.nir
->num_outputs
= blob_read_uint32(blob
);
2078 ctx
.nir
->num_shared
= blob_read_uint32(blob
);
2079 ctx
.nir
->scratch_size
= blob_read_uint32(blob
);
2081 unsigned num_functions
= blob_read_uint32(blob
);
2082 for (unsigned i
= 0; i
< num_functions
; i
++)
2083 read_function(&ctx
);
2085 nir_foreach_function(fxn
, ctx
.nir
) {
2086 if (fxn
->impl
== NIR_SERIALIZE_FUNC_HAS_IMPL
)
2087 fxn
->impl
= read_function_impl(&ctx
, fxn
);
2090 ctx
.nir
->constant_data_size
= blob_read_uint32(blob
);
2091 if (ctx
.nir
->constant_data_size
> 0) {
2092 ctx
.nir
->constant_data
=
2093 ralloc_size(ctx
.nir
, ctx
.nir
->constant_data_size
);
2094 blob_copy_bytes(blob
, ctx
.nir
->constant_data
,
2095 ctx
.nir
->constant_data_size
);
2098 free(ctx
.idx_table
);
2104 nir_shader_serialize_deserialize(nir_shader
*shader
)
2106 const struct nir_shader_compiler_options
*options
= shader
->options
;
2110 nir_serialize(&writer
, shader
, false);
2112 /* Delete all of dest's ralloc children but leave dest alone */
2113 void *dead_ctx
= ralloc_context(NULL
);
2114 ralloc_adopt(dead_ctx
, shader
);
2115 ralloc_free(dead_ctx
);
2117 dead_ctx
= ralloc_context(NULL
);
2119 struct blob_reader reader
;
2120 blob_reader_init(&reader
, writer
.data
, writer
.size
);
2121 nir_shader
*copy
= nir_deserialize(dead_ctx
, options
, &reader
);
2123 blob_finish(&writer
);
2125 nir_shader_replace(shader
, copy
);
2126 ralloc_free(dead_ctx
);