2 * Copyright © 2017 Connor Abbott
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
27 #include "util/u_math.h"
29 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30 #define MAX_OBJECT_IDS (1 << 20)
39 const nir_shader
*nir
;
43 /* maps pointer to index */
44 struct hash_table
*remap_table
;
46 /* the next index to assign to a NIR in-memory object */
49 /* Array of write_phi_fixup structs representing phi sources that need to
50 * be resolved in the second pass.
52 struct util_dynarray phi_fixups
;
54 /* The last serialized type. */
55 const struct glsl_type
*last_type
;
56 const struct glsl_type
*last_interface_type
;
57 struct nir_variable_data last_var_data
;
59 /* Don't write optional data such as variable names. */
66 struct blob_reader
*blob
;
68 /* the next index to assign to a NIR in-memory object */
71 /* The length of the index -> object table */
72 uint32_t idx_table_len
;
74 /* map from index to deserialized pointer */
77 /* List of phi sources. */
78 struct list_head phi_srcs
;
80 /* The last deserialized type. */
81 const struct glsl_type
*last_type
;
82 const struct glsl_type
*last_interface_type
;
83 struct nir_variable_data last_var_data
;
87 write_add_object(write_ctx
*ctx
, const void *obj
)
89 uint32_t index
= ctx
->next_idx
++;
90 assert(index
!= MAX_OBJECT_IDS
);
91 _mesa_hash_table_insert(ctx
->remap_table
, obj
, (void *)(uintptr_t) index
);
95 write_lookup_object(write_ctx
*ctx
, const void *obj
)
97 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->remap_table
, obj
);
99 return (uint32_t)(uintptr_t) entry
->data
;
103 write_object(write_ctx
*ctx
, const void *obj
)
105 blob_write_uint32(ctx
->blob
, write_lookup_object(ctx
, obj
));
109 read_add_object(read_ctx
*ctx
, void *obj
)
111 assert(ctx
->next_idx
< ctx
->idx_table_len
);
112 ctx
->idx_table
[ctx
->next_idx
++] = obj
;
116 read_lookup_object(read_ctx
*ctx
, uint32_t idx
)
118 assert(idx
< ctx
->idx_table_len
);
119 return ctx
->idx_table
[idx
];
123 read_object(read_ctx
*ctx
)
125 return read_lookup_object(ctx
, blob_read_uint32(ctx
->blob
));
129 encode_bit_size_3bits(uint8_t bit_size
)
131 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132 assert(bit_size
<= 64 && util_is_power_of_two_or_zero(bit_size
));
134 return util_logbase2(bit_size
) + 1;
139 decode_bit_size_3bits(uint8_t bit_size
)
142 return 1 << (bit_size
- 1);
147 encode_num_components_in_3bits(uint8_t num_components
)
149 if (num_components
<= 4)
150 return num_components
;
151 if (num_components
== 8)
153 if (num_components
== 16)
156 unreachable("invalid number in num_components");
161 decode_num_components_in_3bits(uint8_t value
)
170 unreachable("invalid num_components encoding");
175 write_constant(write_ctx
*ctx
, const nir_constant
*c
)
177 blob_write_bytes(ctx
->blob
, c
->values
, sizeof(c
->values
));
178 blob_write_uint32(ctx
->blob
, c
->num_elements
);
179 for (unsigned i
= 0; i
< c
->num_elements
; i
++)
180 write_constant(ctx
, c
->elements
[i
]);
183 static nir_constant
*
184 read_constant(read_ctx
*ctx
, nir_variable
*nvar
)
186 nir_constant
*c
= ralloc(nvar
, nir_constant
);
188 blob_copy_bytes(ctx
->blob
, (uint8_t *)c
->values
, sizeof(c
->values
));
189 c
->num_elements
= blob_read_uint32(ctx
->blob
);
190 c
->elements
= ralloc_array(nvar
, nir_constant
*, c
->num_elements
);
191 for (unsigned i
= 0; i
< c
->num_elements
; i
++)
192 c
->elements
[i
] = read_constant(ctx
, nvar
);
197 enum var_data_encoding
{
199 var_encode_shader_temp
,
200 var_encode_function_temp
,
201 var_encode_location_diff
,
208 unsigned has_constant_initializer
:1;
209 unsigned has_interface_type
:1;
210 unsigned num_state_slots
:7;
211 unsigned data_encoding
:2;
212 unsigned type_same_as_last
:1;
213 unsigned interface_type_same_as_last
:1;
215 unsigned num_members
:16;
219 union packed_var_data_diff
{
224 int driver_location
:16;
229 write_variable(write_ctx
*ctx
, const nir_variable
*var
)
231 write_add_object(ctx
, var
);
233 assert(var
->num_state_slots
< (1 << 7));
234 assert(var
->num_members
< (1 << 16));
236 STATIC_ASSERT(sizeof(union packed_var
) == 4);
237 union packed_var flags
;
240 flags
.u
.has_name
= !ctx
->strip
&& var
->name
;
241 flags
.u
.has_constant_initializer
= !!(var
->constant_initializer
);
242 flags
.u
.has_interface_type
= !!(var
->interface_type
);
243 flags
.u
.type_same_as_last
= var
->type
== ctx
->last_type
;
244 flags
.u
.interface_type_same_as_last
=
245 var
->interface_type
&& var
->interface_type
== ctx
->last_interface_type
;
246 flags
.u
.num_state_slots
= var
->num_state_slots
;
247 flags
.u
.num_members
= var
->num_members
;
249 struct nir_variable_data data
= var
->data
;
251 /* When stripping, we expect that the location is no longer needed,
252 * which is typically after shaders are linked.
255 data
.mode
!= nir_var_shader_in
&&
256 data
.mode
!= nir_var_shader_out
)
259 /* Temporary variables don't serialize var->data. */
260 if (data
.mode
== nir_var_shader_temp
)
261 flags
.u
.data_encoding
= var_encode_shader_temp
;
262 else if (data
.mode
== nir_var_function_temp
)
263 flags
.u
.data_encoding
= var_encode_function_temp
;
265 struct nir_variable_data tmp
= data
;
267 tmp
.location
= ctx
->last_var_data
.location
;
268 tmp
.location_frac
= ctx
->last_var_data
.location_frac
;
269 tmp
.driver_location
= ctx
->last_var_data
.driver_location
;
271 /* See if we can encode only the difference in locations from the last
274 if (memcmp(&ctx
->last_var_data
, &tmp
, sizeof(tmp
)) == 0 &&
275 abs((int)data
.location
-
276 (int)ctx
->last_var_data
.location
) < (1 << 12) &&
277 abs((int)data
.driver_location
-
278 (int)ctx
->last_var_data
.driver_location
) < (1 << 15))
279 flags
.u
.data_encoding
= var_encode_location_diff
;
281 flags
.u
.data_encoding
= var_encode_full
;
284 blob_write_uint32(ctx
->blob
, flags
.u32
);
286 if (!flags
.u
.type_same_as_last
) {
287 encode_type_to_blob(ctx
->blob
, var
->type
);
288 ctx
->last_type
= var
->type
;
291 if (var
->interface_type
&& !flags
.u
.interface_type_same_as_last
) {
292 encode_type_to_blob(ctx
->blob
, var
->interface_type
);
293 ctx
->last_interface_type
= var
->interface_type
;
296 if (flags
.u
.has_name
)
297 blob_write_string(ctx
->blob
, var
->name
);
299 if (flags
.u
.data_encoding
== var_encode_full
||
300 flags
.u
.data_encoding
== var_encode_location_diff
) {
301 if (flags
.u
.data_encoding
== var_encode_full
) {
302 blob_write_bytes(ctx
->blob
, &data
, sizeof(data
));
304 /* Serialize only the difference in locations from the last variable.
306 union packed_var_data_diff diff
;
308 diff
.u
.location
= data
.location
- ctx
->last_var_data
.location
;
309 diff
.u
.location_frac
= data
.location_frac
-
310 ctx
->last_var_data
.location_frac
;
311 diff
.u
.driver_location
= data
.driver_location
-
312 ctx
->last_var_data
.driver_location
;
314 blob_write_uint32(ctx
->blob
, diff
.u32
);
317 ctx
->last_var_data
= data
;
320 for (unsigned i
= 0; i
< var
->num_state_slots
; i
++) {
321 blob_write_bytes(ctx
->blob
, &var
->state_slots
[i
],
322 sizeof(var
->state_slots
[i
]));
324 if (var
->constant_initializer
)
325 write_constant(ctx
, var
->constant_initializer
);
326 if (var
->num_members
> 0) {
327 blob_write_bytes(ctx
->blob
, (uint8_t *) var
->members
,
328 var
->num_members
* sizeof(*var
->members
));
332 static nir_variable
*
333 read_variable(read_ctx
*ctx
)
335 nir_variable
*var
= rzalloc(ctx
->nir
, nir_variable
);
336 read_add_object(ctx
, var
);
338 union packed_var flags
;
339 flags
.u32
= blob_read_uint32(ctx
->blob
);
341 if (flags
.u
.type_same_as_last
) {
342 var
->type
= ctx
->last_type
;
344 var
->type
= decode_type_from_blob(ctx
->blob
);
345 ctx
->last_type
= var
->type
;
348 if (flags
.u
.has_interface_type
) {
349 if (flags
.u
.interface_type_same_as_last
) {
350 var
->interface_type
= ctx
->last_interface_type
;
352 var
->interface_type
= decode_type_from_blob(ctx
->blob
);
353 ctx
->last_interface_type
= var
->interface_type
;
357 if (flags
.u
.has_name
) {
358 const char *name
= blob_read_string(ctx
->blob
);
359 var
->name
= ralloc_strdup(var
, name
);
364 if (flags
.u
.data_encoding
== var_encode_shader_temp
)
365 var
->data
.mode
= nir_var_shader_temp
;
366 else if (flags
.u
.data_encoding
== var_encode_function_temp
)
367 var
->data
.mode
= nir_var_function_temp
;
368 else if (flags
.u
.data_encoding
== var_encode_full
) {
369 blob_copy_bytes(ctx
->blob
, (uint8_t *) &var
->data
, sizeof(var
->data
));
370 ctx
->last_var_data
= var
->data
;
371 } else { /* var_encode_location_diff */
372 union packed_var_data_diff diff
;
373 diff
.u32
= blob_read_uint32(ctx
->blob
);
375 var
->data
= ctx
->last_var_data
;
376 var
->data
.location
+= diff
.u
.location
;
377 var
->data
.location_frac
+= diff
.u
.location_frac
;
378 var
->data
.driver_location
+= diff
.u
.driver_location
;
380 ctx
->last_var_data
= var
->data
;
383 var
->num_state_slots
= flags
.u
.num_state_slots
;
384 if (var
->num_state_slots
!= 0) {
385 var
->state_slots
= ralloc_array(var
, nir_state_slot
,
386 var
->num_state_slots
);
387 for (unsigned i
= 0; i
< var
->num_state_slots
; i
++) {
388 blob_copy_bytes(ctx
->blob
, &var
->state_slots
[i
],
389 sizeof(var
->state_slots
[i
]));
392 if (flags
.u
.has_constant_initializer
)
393 var
->constant_initializer
= read_constant(ctx
, var
);
395 var
->constant_initializer
= NULL
;
396 var
->num_members
= flags
.u
.num_members
;
397 if (var
->num_members
> 0) {
398 var
->members
= ralloc_array(var
, struct nir_variable_data
,
400 blob_copy_bytes(ctx
->blob
, (uint8_t *) var
->members
,
401 var
->num_members
* sizeof(*var
->members
));
408 write_var_list(write_ctx
*ctx
, const struct exec_list
*src
)
410 blob_write_uint32(ctx
->blob
, exec_list_length(src
));
411 foreach_list_typed(nir_variable
, var
, node
, src
) {
412 write_variable(ctx
, var
);
417 read_var_list(read_ctx
*ctx
, struct exec_list
*dst
)
419 exec_list_make_empty(dst
);
420 unsigned num_vars
= blob_read_uint32(ctx
->blob
);
421 for (unsigned i
= 0; i
< num_vars
; i
++) {
422 nir_variable
*var
= read_variable(ctx
);
423 exec_list_push_tail(dst
, &var
->node
);
428 write_register(write_ctx
*ctx
, const nir_register
*reg
)
430 write_add_object(ctx
, reg
);
431 blob_write_uint32(ctx
->blob
, reg
->num_components
);
432 blob_write_uint32(ctx
->blob
, reg
->bit_size
);
433 blob_write_uint32(ctx
->blob
, reg
->num_array_elems
);
434 blob_write_uint32(ctx
->blob
, reg
->index
);
435 blob_write_uint32(ctx
->blob
, !ctx
->strip
&& reg
->name
);
436 if (!ctx
->strip
&& reg
->name
)
437 blob_write_string(ctx
->blob
, reg
->name
);
440 static nir_register
*
441 read_register(read_ctx
*ctx
)
443 nir_register
*reg
= ralloc(ctx
->nir
, nir_register
);
444 read_add_object(ctx
, reg
);
445 reg
->num_components
= blob_read_uint32(ctx
->blob
);
446 reg
->bit_size
= blob_read_uint32(ctx
->blob
);
447 reg
->num_array_elems
= blob_read_uint32(ctx
->blob
);
448 reg
->index
= blob_read_uint32(ctx
->blob
);
449 bool has_name
= blob_read_uint32(ctx
->blob
);
451 const char *name
= blob_read_string(ctx
->blob
);
452 reg
->name
= ralloc_strdup(reg
, name
);
457 list_inithead(®
->uses
);
458 list_inithead(®
->defs
);
459 list_inithead(®
->if_uses
);
465 write_reg_list(write_ctx
*ctx
, const struct exec_list
*src
)
467 blob_write_uint32(ctx
->blob
, exec_list_length(src
));
468 foreach_list_typed(nir_register
, reg
, node
, src
)
469 write_register(ctx
, reg
);
473 read_reg_list(read_ctx
*ctx
, struct exec_list
*dst
)
475 exec_list_make_empty(dst
);
476 unsigned num_regs
= blob_read_uint32(ctx
->blob
);
477 for (unsigned i
= 0; i
< num_regs
; i
++) {
478 nir_register
*reg
= read_register(ctx
);
479 exec_list_push_tail(dst
, ®
->node
);
486 unsigned is_ssa
:1; /* <-- Header */
487 unsigned is_indirect
:1;
488 unsigned object_idx
:20;
489 unsigned _footer
:10; /* <-- Footer */
492 unsigned _header
:22; /* <-- Header */
493 unsigned negate
:1; /* <-- Footer */
495 unsigned swizzle_x
:2;
496 unsigned swizzle_y
:2;
497 unsigned swizzle_z
:2;
498 unsigned swizzle_w
:2;
501 unsigned _header
:22; /* <-- Header */
502 unsigned src_type
:5; /* <-- Footer */
508 write_src_full(write_ctx
*ctx
, const nir_src
*src
, union packed_src header
)
510 /* Since sources are very frequent, we try to save some space when storing
511 * them. In particular, we store whether the source is a register and
512 * whether the register has an indirect index in the low two bits. We can
513 * assume that the high two bits of the index are zero, since otherwise our
514 * address space would've been exhausted allocating the remap table!
516 header
.any
.is_ssa
= src
->is_ssa
;
518 header
.any
.object_idx
= write_lookup_object(ctx
, src
->ssa
);
519 blob_write_uint32(ctx
->blob
, header
.u32
);
521 header
.any
.object_idx
= write_lookup_object(ctx
, src
->reg
.reg
);
522 header
.any
.is_indirect
= !!src
->reg
.indirect
;
523 blob_write_uint32(ctx
->blob
, header
.u32
);
524 blob_write_uint32(ctx
->blob
, src
->reg
.base_offset
);
525 if (src
->reg
.indirect
) {
526 union packed_src header
= {0};
527 write_src_full(ctx
, src
->reg
.indirect
, header
);
533 write_src(write_ctx
*ctx
, const nir_src
*src
)
535 union packed_src header
= {0};
536 write_src_full(ctx
, src
, header
);
539 static union packed_src
540 read_src(read_ctx
*ctx
, nir_src
*src
, void *mem_ctx
)
542 STATIC_ASSERT(sizeof(union packed_src
) == 4);
543 union packed_src header
;
544 header
.u32
= blob_read_uint32(ctx
->blob
);
546 src
->is_ssa
= header
.any
.is_ssa
;
548 src
->ssa
= read_lookup_object(ctx
, header
.any
.object_idx
);
550 src
->reg
.reg
= read_lookup_object(ctx
, header
.any
.object_idx
);
551 src
->reg
.base_offset
= blob_read_uint32(ctx
->blob
);
552 if (header
.any
.is_indirect
) {
553 src
->reg
.indirect
= ralloc(mem_ctx
, nir_src
);
554 read_src(ctx
, src
->reg
.indirect
, mem_ctx
);
556 src
->reg
.indirect
= NULL
;
567 uint8_t num_components
:3;
572 uint8_t is_indirect
:1;
580 unsigned instr_type
:4; /* always present */
582 unsigned dest
:8; /* always last */
585 unsigned instr_type
:4;
587 unsigned no_signed_wrap
:1;
588 unsigned no_unsigned_wrap
:1;
590 unsigned writemask
:4;
596 unsigned instr_type
:4;
597 unsigned deref_type
:3;
603 unsigned instr_type
:4;
604 unsigned intrinsic
:9;
605 unsigned num_components
:3;
610 unsigned instr_type
:4;
611 unsigned last_component
:4;
616 unsigned instr_type
:4;
617 unsigned last_component
:4;
622 unsigned instr_type
:4;
625 unsigned texture_array_size
:12;
629 unsigned instr_type
:4;
630 unsigned num_srcs
:20;
634 unsigned instr_type
:4;
640 /* Write "lo24" as low 24 bits in the first uint32. */
642 write_dest(write_ctx
*ctx
, const nir_dest
*dst
, union packed_instr header
)
644 STATIC_ASSERT(sizeof(union packed_dest
) == 1);
645 union packed_dest dest
;
648 dest
.ssa
.is_ssa
= dst
->is_ssa
;
650 dest
.ssa
.has_name
= !ctx
->strip
&& dst
->ssa
.name
;
651 dest
.ssa
.num_components
=
652 encode_num_components_in_3bits(dst
->ssa
.num_components
);
653 dest
.ssa
.bit_size
= encode_bit_size_3bits(dst
->ssa
.bit_size
);
655 dest
.reg
.is_indirect
= !!(dst
->reg
.indirect
);
658 header
.any
.dest
= dest
.u8
;
659 blob_write_uint32(ctx
->blob
, header
.u32
);
662 write_add_object(ctx
, &dst
->ssa
);
663 if (dest
.ssa
.has_name
)
664 blob_write_string(ctx
->blob
, dst
->ssa
.name
);
666 blob_write_uint32(ctx
->blob
, write_lookup_object(ctx
, dst
->reg
.reg
));
667 blob_write_uint32(ctx
->blob
, dst
->reg
.base_offset
);
668 if (dst
->reg
.indirect
)
669 write_src(ctx
, dst
->reg
.indirect
);
674 read_dest(read_ctx
*ctx
, nir_dest
*dst
, nir_instr
*instr
,
675 union packed_instr header
)
677 union packed_dest dest
;
678 dest
.u8
= header
.any
.dest
;
680 if (dest
.ssa
.is_ssa
) {
681 unsigned bit_size
= decode_bit_size_3bits(dest
.ssa
.bit_size
);
682 unsigned num_components
=
683 decode_num_components_in_3bits(dest
.ssa
.num_components
);
684 char *name
= dest
.ssa
.has_name
? blob_read_string(ctx
->blob
) : NULL
;
685 nir_ssa_dest_init(instr
, dst
, num_components
, bit_size
, name
);
686 read_add_object(ctx
, &dst
->ssa
);
688 dst
->reg
.reg
= read_object(ctx
);
689 dst
->reg
.base_offset
= blob_read_uint32(ctx
->blob
);
690 if (dest
.reg
.is_indirect
) {
691 dst
->reg
.indirect
= ralloc(instr
, nir_src
);
692 read_src(ctx
, dst
->reg
.indirect
, instr
);
698 write_alu(write_ctx
*ctx
, const nir_alu_instr
*alu
)
700 /* 9 bits for nir_op */
701 STATIC_ASSERT(nir_num_opcodes
<= 512);
702 union packed_instr header
;
705 header
.alu
.instr_type
= alu
->instr
.type
;
706 header
.alu
.exact
= alu
->exact
;
707 header
.alu
.no_signed_wrap
= alu
->no_signed_wrap
;
708 header
.alu
.no_unsigned_wrap
= alu
->no_unsigned_wrap
;
709 header
.alu
.saturate
= alu
->dest
.saturate
;
710 header
.alu
.writemask
= alu
->dest
.write_mask
;
711 header
.alu
.op
= alu
->op
;
713 write_dest(ctx
, &alu
->dest
.dest
, header
);
715 for (unsigned i
= 0; i
< nir_op_infos
[alu
->op
].num_inputs
; i
++) {
716 union packed_src src
;
719 src
.alu
.negate
= alu
->src
[i
].negate
;
720 src
.alu
.abs
= alu
->src
[i
].abs
;
721 src
.alu
.swizzle_x
= alu
->src
[i
].swizzle
[0];
722 src
.alu
.swizzle_y
= alu
->src
[i
].swizzle
[1];
723 src
.alu
.swizzle_z
= alu
->src
[i
].swizzle
[2];
724 src
.alu
.swizzle_w
= alu
->src
[i
].swizzle
[3];
726 write_src_full(ctx
, &alu
->src
[i
].src
, src
);
730 static nir_alu_instr
*
731 read_alu(read_ctx
*ctx
, union packed_instr header
)
733 nir_alu_instr
*alu
= nir_alu_instr_create(ctx
->nir
, header
.alu
.op
);
735 alu
->exact
= header
.alu
.exact
;
736 alu
->no_signed_wrap
= header
.alu
.no_signed_wrap
;
737 alu
->no_unsigned_wrap
= header
.alu
.no_unsigned_wrap
;
738 alu
->dest
.saturate
= header
.alu
.saturate
;
739 alu
->dest
.write_mask
= header
.alu
.writemask
;
741 read_dest(ctx
, &alu
->dest
.dest
, &alu
->instr
, header
);
743 for (unsigned i
= 0; i
< nir_op_infos
[header
.alu
.op
].num_inputs
; i
++) {
744 union packed_src src
= read_src(ctx
, &alu
->src
[i
].src
, &alu
->instr
);
746 alu
->src
[i
].negate
= src
.alu
.negate
;
747 alu
->src
[i
].abs
= src
.alu
.abs
;
748 alu
->src
[i
].swizzle
[0] = src
.alu
.swizzle_x
;
749 alu
->src
[i
].swizzle
[1] = src
.alu
.swizzle_y
;
750 alu
->src
[i
].swizzle
[2] = src
.alu
.swizzle_z
;
751 alu
->src
[i
].swizzle
[3] = src
.alu
.swizzle_w
;
758 write_deref(write_ctx
*ctx
, const nir_deref_instr
*deref
)
760 assert(deref
->deref_type
< 8);
761 assert(deref
->mode
< (1 << 10));
763 union packed_instr header
;
766 header
.deref
.instr_type
= deref
->instr
.type
;
767 header
.deref
.deref_type
= deref
->deref_type
;
768 header
.deref
.mode
= deref
->mode
;
770 write_dest(ctx
, &deref
->dest
, header
);
771 encode_type_to_blob(ctx
->blob
, deref
->type
);
773 if (deref
->deref_type
== nir_deref_type_var
) {
774 write_object(ctx
, deref
->var
);
778 write_src(ctx
, &deref
->parent
);
780 switch (deref
->deref_type
) {
781 case nir_deref_type_struct
:
782 blob_write_uint32(ctx
->blob
, deref
->strct
.index
);
785 case nir_deref_type_array
:
786 case nir_deref_type_ptr_as_array
:
787 write_src(ctx
, &deref
->arr
.index
);
790 case nir_deref_type_cast
:
791 blob_write_uint32(ctx
->blob
, deref
->cast
.ptr_stride
);
794 case nir_deref_type_array_wildcard
:
799 unreachable("Invalid deref type");
803 static nir_deref_instr
*
804 read_deref(read_ctx
*ctx
, union packed_instr header
)
806 nir_deref_type deref_type
= header
.deref
.deref_type
;
807 nir_deref_instr
*deref
= nir_deref_instr_create(ctx
->nir
, deref_type
);
809 read_dest(ctx
, &deref
->dest
, &deref
->instr
, header
);
811 deref
->mode
= header
.deref
.mode
;
812 deref
->type
= decode_type_from_blob(ctx
->blob
);
814 if (deref_type
== nir_deref_type_var
) {
815 deref
->var
= read_object(ctx
);
819 read_src(ctx
, &deref
->parent
, &deref
->instr
);
821 switch (deref
->deref_type
) {
822 case nir_deref_type_struct
:
823 deref
->strct
.index
= blob_read_uint32(ctx
->blob
);
826 case nir_deref_type_array
:
827 case nir_deref_type_ptr_as_array
:
828 read_src(ctx
, &deref
->arr
.index
, &deref
->instr
);
831 case nir_deref_type_cast
:
832 deref
->cast
.ptr_stride
= blob_read_uint32(ctx
->blob
);
835 case nir_deref_type_array_wildcard
:
840 unreachable("Invalid deref type");
847 write_intrinsic(write_ctx
*ctx
, const nir_intrinsic_instr
*intrin
)
849 /* 9 bits for nir_intrinsic_op */
850 STATIC_ASSERT(nir_num_intrinsics
<= 512);
851 unsigned num_srcs
= nir_intrinsic_infos
[intrin
->intrinsic
].num_srcs
;
852 unsigned num_indices
= nir_intrinsic_infos
[intrin
->intrinsic
].num_indices
;
853 assert(intrin
->intrinsic
< 512);
855 union packed_instr header
;
858 header
.intrinsic
.instr_type
= intrin
->instr
.type
;
859 header
.intrinsic
.intrinsic
= intrin
->intrinsic
;
860 header
.intrinsic
.num_components
=
861 encode_num_components_in_3bits(intrin
->num_components
);
863 if (nir_intrinsic_infos
[intrin
->intrinsic
].has_dest
)
864 write_dest(ctx
, &intrin
->dest
, header
);
866 blob_write_uint32(ctx
->blob
, header
.u32
);
868 for (unsigned i
= 0; i
< num_srcs
; i
++)
869 write_src(ctx
, &intrin
->src
[i
]);
871 for (unsigned i
= 0; i
< num_indices
; i
++)
872 blob_write_uint32(ctx
->blob
, intrin
->const_index
[i
]);
875 static nir_intrinsic_instr
*
876 read_intrinsic(read_ctx
*ctx
, union packed_instr header
)
878 nir_intrinsic_op op
= header
.intrinsic
.intrinsic
;
879 nir_intrinsic_instr
*intrin
= nir_intrinsic_instr_create(ctx
->nir
, op
);
881 unsigned num_srcs
= nir_intrinsic_infos
[op
].num_srcs
;
882 unsigned num_indices
= nir_intrinsic_infos
[op
].num_indices
;
884 intrin
->num_components
=
885 decode_num_components_in_3bits(header
.intrinsic
.num_components
);
887 if (nir_intrinsic_infos
[op
].has_dest
)
888 read_dest(ctx
, &intrin
->dest
, &intrin
->instr
, header
);
890 for (unsigned i
= 0; i
< num_srcs
; i
++)
891 read_src(ctx
, &intrin
->src
[i
], &intrin
->instr
);
893 for (unsigned i
= 0; i
< num_indices
; i
++)
894 intrin
->const_index
[i
] = blob_read_uint32(ctx
->blob
);
900 write_load_const(write_ctx
*ctx
, const nir_load_const_instr
*lc
)
902 assert(lc
->def
.num_components
>= 1 && lc
->def
.num_components
<= 16);
903 union packed_instr header
;
906 header
.load_const
.instr_type
= lc
->instr
.type
;
907 header
.load_const
.last_component
= lc
->def
.num_components
- 1;
908 header
.load_const
.bit_size
= encode_bit_size_3bits(lc
->def
.bit_size
);
910 blob_write_uint32(ctx
->blob
, header
.u32
);
912 switch (lc
->def
.bit_size
) {
914 blob_write_bytes(ctx
->blob
, lc
->value
,
915 sizeof(*lc
->value
) * lc
->def
.num_components
);
919 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
920 blob_write_uint32(ctx
->blob
, lc
->value
[i
].u32
);
924 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
925 blob_write_uint16(ctx
->blob
, lc
->value
[i
].u16
);
929 assert(lc
->def
.bit_size
<= 8);
930 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
931 blob_write_uint8(ctx
->blob
, lc
->value
[i
].u8
);
935 write_add_object(ctx
, &lc
->def
);
938 static nir_load_const_instr
*
939 read_load_const(read_ctx
*ctx
, union packed_instr header
)
941 nir_load_const_instr
*lc
=
942 nir_load_const_instr_create(ctx
->nir
, header
.load_const
.last_component
+ 1,
943 decode_bit_size_3bits(header
.load_const
.bit_size
));
945 switch (lc
->def
.bit_size
) {
947 blob_copy_bytes(ctx
->blob
, lc
->value
, sizeof(*lc
->value
) * lc
->def
.num_components
);
951 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
952 lc
->value
[i
].u32
= blob_read_uint32(ctx
->blob
);
956 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
957 lc
->value
[i
].u16
= blob_read_uint16(ctx
->blob
);
961 assert(lc
->def
.bit_size
<= 8);
962 for (unsigned i
= 0; i
< lc
->def
.num_components
; i
++)
963 lc
->value
[i
].u8
= blob_read_uint8(ctx
->blob
);
967 read_add_object(ctx
, &lc
->def
);
972 write_ssa_undef(write_ctx
*ctx
, const nir_ssa_undef_instr
*undef
)
974 assert(undef
->def
.num_components
>= 1 && undef
->def
.num_components
<= 16);
976 union packed_instr header
;
979 header
.undef
.instr_type
= undef
->instr
.type
;
980 header
.undef
.last_component
= undef
->def
.num_components
- 1;
981 header
.undef
.bit_size
= encode_bit_size_3bits(undef
->def
.bit_size
);
983 blob_write_uint32(ctx
->blob
, header
.u32
);
984 write_add_object(ctx
, &undef
->def
);
987 static nir_ssa_undef_instr
*
988 read_ssa_undef(read_ctx
*ctx
, union packed_instr header
)
990 nir_ssa_undef_instr
*undef
=
991 nir_ssa_undef_instr_create(ctx
->nir
, header
.undef
.last_component
+ 1,
992 decode_bit_size_3bits(header
.undef
.bit_size
));
994 read_add_object(ctx
, &undef
->def
);
998 union packed_tex_data
{
1001 enum glsl_sampler_dim sampler_dim
:4;
1002 nir_alu_type dest_type
:8;
1003 unsigned coord_components
:3;
1004 unsigned is_array
:1;
1005 unsigned is_shadow
:1;
1006 unsigned is_new_style_shadow
:1;
1007 unsigned component
:2;
1008 unsigned unused
:10; /* Mark unused for valgrind. */
1013 write_tex(write_ctx
*ctx
, const nir_tex_instr
*tex
)
1015 assert(tex
->num_srcs
< 16);
1016 assert(tex
->op
< 16);
1017 assert(tex
->texture_array_size
< 1024);
1019 union packed_instr header
;
1022 header
.tex
.instr_type
= tex
->instr
.type
;
1023 header
.tex
.num_srcs
= tex
->num_srcs
;
1024 header
.tex
.op
= tex
->op
;
1025 header
.tex
.texture_array_size
= tex
->texture_array_size
;
1027 write_dest(ctx
, &tex
->dest
, header
);
1029 blob_write_uint32(ctx
->blob
, tex
->texture_index
);
1030 blob_write_uint32(ctx
->blob
, tex
->sampler_index
);
1031 if (tex
->op
== nir_texop_tg4
)
1032 blob_write_bytes(ctx
->blob
, tex
->tg4_offsets
, sizeof(tex
->tg4_offsets
));
1034 STATIC_ASSERT(sizeof(union packed_tex_data
) == sizeof(uint32_t));
1035 union packed_tex_data packed
= {
1036 .u
.sampler_dim
= tex
->sampler_dim
,
1037 .u
.dest_type
= tex
->dest_type
,
1038 .u
.coord_components
= tex
->coord_components
,
1039 .u
.is_array
= tex
->is_array
,
1040 .u
.is_shadow
= tex
->is_shadow
,
1041 .u
.is_new_style_shadow
= tex
->is_new_style_shadow
,
1042 .u
.component
= tex
->component
,
1044 blob_write_uint32(ctx
->blob
, packed
.u32
);
1046 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
1047 union packed_src src
;
1049 src
.tex
.src_type
= tex
->src
[i
].src_type
;
1050 write_src_full(ctx
, &tex
->src
[i
].src
, src
);
1054 static nir_tex_instr
*
1055 read_tex(read_ctx
*ctx
, union packed_instr header
)
1057 nir_tex_instr
*tex
= nir_tex_instr_create(ctx
->nir
, header
.tex
.num_srcs
);
1059 read_dest(ctx
, &tex
->dest
, &tex
->instr
, header
);
1061 tex
->op
= header
.tex
.op
;
1062 tex
->texture_index
= blob_read_uint32(ctx
->blob
);
1063 tex
->texture_array_size
= header
.tex
.texture_array_size
;
1064 tex
->sampler_index
= blob_read_uint32(ctx
->blob
);
1065 if (tex
->op
== nir_texop_tg4
)
1066 blob_copy_bytes(ctx
->blob
, tex
->tg4_offsets
, sizeof(tex
->tg4_offsets
));
1068 union packed_tex_data packed
;
1069 packed
.u32
= blob_read_uint32(ctx
->blob
);
1070 tex
->sampler_dim
= packed
.u
.sampler_dim
;
1071 tex
->dest_type
= packed
.u
.dest_type
;
1072 tex
->coord_components
= packed
.u
.coord_components
;
1073 tex
->is_array
= packed
.u
.is_array
;
1074 tex
->is_shadow
= packed
.u
.is_shadow
;
1075 tex
->is_new_style_shadow
= packed
.u
.is_new_style_shadow
;
1076 tex
->component
= packed
.u
.component
;
1078 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
1079 union packed_src src
= read_src(ctx
, &tex
->src
[i
].src
, &tex
->instr
);
1080 tex
->src
[i
].src_type
= src
.tex
.src_type
;
1087 write_phi(write_ctx
*ctx
, const nir_phi_instr
*phi
)
1089 union packed_instr header
;
1092 header
.phi
.instr_type
= phi
->instr
.type
;
1093 header
.phi
.num_srcs
= exec_list_length(&phi
->srcs
);
1095 /* Phi nodes are special, since they may reference SSA definitions and
1096 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1097 * and then store enough information so that a later fixup pass can fill
1098 * them in correctly.
1100 write_dest(ctx
, &phi
->dest
, header
);
1102 nir_foreach_phi_src(src
, phi
) {
1103 assert(src
->src
.is_ssa
);
1104 size_t blob_offset
= blob_reserve_uint32(ctx
->blob
);
1105 ASSERTED
size_t blob_offset2
= blob_reserve_uint32(ctx
->blob
);
1106 assert(blob_offset
+ sizeof(uint32_t) == blob_offset2
);
1107 write_phi_fixup fixup
= {
1108 .blob_offset
= blob_offset
,
1109 .src
= src
->src
.ssa
,
1112 util_dynarray_append(&ctx
->phi_fixups
, write_phi_fixup
, fixup
);
1117 write_fixup_phis(write_ctx
*ctx
)
1119 util_dynarray_foreach(&ctx
->phi_fixups
, write_phi_fixup
, fixup
) {
1120 uint32_t *blob_ptr
= (uint32_t *)(ctx
->blob
->data
+ fixup
->blob_offset
);
1121 blob_ptr
[0] = write_lookup_object(ctx
, fixup
->src
);
1122 blob_ptr
[1] = write_lookup_object(ctx
, fixup
->block
);
1125 util_dynarray_clear(&ctx
->phi_fixups
);
1128 static nir_phi_instr
*
1129 read_phi(read_ctx
*ctx
, nir_block
*blk
, union packed_instr header
)
1131 nir_phi_instr
*phi
= nir_phi_instr_create(ctx
->nir
);
1133 read_dest(ctx
, &phi
->dest
, &phi
->instr
, header
);
1135 /* For similar reasons as before, we just store the index directly into the
1136 * pointer, and let a later pass resolve the phi sources.
1138 * In order to ensure that the copied sources (which are just the indices
1139 * from the blob for now) don't get inserted into the old shader's use-def
1140 * lists, we have to add the phi instruction *before* we set up its
1143 nir_instr_insert_after_block(blk
, &phi
->instr
);
1145 for (unsigned i
= 0; i
< header
.phi
.num_srcs
; i
++) {
1146 nir_phi_src
*src
= ralloc(phi
, nir_phi_src
);
1148 src
->src
.is_ssa
= true;
1149 src
->src
.ssa
= (nir_ssa_def
*)(uintptr_t) blob_read_uint32(ctx
->blob
);
1150 src
->pred
= (nir_block
*)(uintptr_t) blob_read_uint32(ctx
->blob
);
1152 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1153 * we have to set the parent_instr manually. It doesn't really matter
1154 * when we do it, so we might as well do it here.
1156 src
->src
.parent_instr
= &phi
->instr
;
1158 /* Stash it in the list of phi sources. We'll walk this list and fix up
1159 * sources at the very end of read_function_impl.
1161 list_add(&src
->src
.use_link
, &ctx
->phi_srcs
);
1163 exec_list_push_tail(&phi
->srcs
, &src
->node
);
1170 read_fixup_phis(read_ctx
*ctx
)
1172 list_for_each_entry_safe(nir_phi_src
, src
, &ctx
->phi_srcs
, src
.use_link
) {
1173 src
->pred
= read_lookup_object(ctx
, (uintptr_t)src
->pred
);
1174 src
->src
.ssa
= read_lookup_object(ctx
, (uintptr_t)src
->src
.ssa
);
1176 /* Remove from this list */
1177 list_del(&src
->src
.use_link
);
1179 list_addtail(&src
->src
.use_link
, &src
->src
.ssa
->uses
);
1181 assert(list_is_empty(&ctx
->phi_srcs
));
1185 write_jump(write_ctx
*ctx
, const nir_jump_instr
*jmp
)
1187 assert(jmp
->type
< 4);
1189 union packed_instr header
;
1192 header
.jump
.instr_type
= jmp
->instr
.type
;
1193 header
.jump
.type
= jmp
->type
;
1195 blob_write_uint32(ctx
->blob
, header
.u32
);
1198 static nir_jump_instr
*
1199 read_jump(read_ctx
*ctx
, union packed_instr header
)
1201 nir_jump_instr
*jmp
= nir_jump_instr_create(ctx
->nir
, header
.jump
.type
);
1206 write_call(write_ctx
*ctx
, const nir_call_instr
*call
)
1208 blob_write_uint32(ctx
->blob
, write_lookup_object(ctx
, call
->callee
));
1210 for (unsigned i
= 0; i
< call
->num_params
; i
++)
1211 write_src(ctx
, &call
->params
[i
]);
1214 static nir_call_instr
*
1215 read_call(read_ctx
*ctx
)
1217 nir_function
*callee
= read_object(ctx
);
1218 nir_call_instr
*call
= nir_call_instr_create(ctx
->nir
, callee
);
1220 for (unsigned i
= 0; i
< call
->num_params
; i
++)
1221 read_src(ctx
, &call
->params
[i
], call
);
1227 write_instr(write_ctx
*ctx
, const nir_instr
*instr
)
1229 /* We have only 4 bits for the instruction type. */
1230 assert(instr
->type
< 16);
1232 switch (instr
->type
) {
1233 case nir_instr_type_alu
:
1234 write_alu(ctx
, nir_instr_as_alu(instr
));
1236 case nir_instr_type_deref
:
1237 write_deref(ctx
, nir_instr_as_deref(instr
));
1239 case nir_instr_type_intrinsic
:
1240 write_intrinsic(ctx
, nir_instr_as_intrinsic(instr
));
1242 case nir_instr_type_load_const
:
1243 write_load_const(ctx
, nir_instr_as_load_const(instr
));
1245 case nir_instr_type_ssa_undef
:
1246 write_ssa_undef(ctx
, nir_instr_as_ssa_undef(instr
));
1248 case nir_instr_type_tex
:
1249 write_tex(ctx
, nir_instr_as_tex(instr
));
1251 case nir_instr_type_phi
:
1252 write_phi(ctx
, nir_instr_as_phi(instr
));
1254 case nir_instr_type_jump
:
1255 write_jump(ctx
, nir_instr_as_jump(instr
));
1257 case nir_instr_type_call
:
1258 blob_write_uint32(ctx
->blob
, instr
->type
);
1259 write_call(ctx
, nir_instr_as_call(instr
));
1261 case nir_instr_type_parallel_copy
:
1262 unreachable("Cannot write parallel copies");
1264 unreachable("bad instr type");
1269 read_instr(read_ctx
*ctx
, nir_block
*block
)
1271 STATIC_ASSERT(sizeof(union packed_instr
) == 4);
1272 union packed_instr header
;
1273 header
.u32
= blob_read_uint32(ctx
->blob
);
1276 switch (header
.any
.instr_type
) {
1277 case nir_instr_type_alu
:
1278 instr
= &read_alu(ctx
, header
)->instr
;
1280 case nir_instr_type_deref
:
1281 instr
= &read_deref(ctx
, header
)->instr
;
1283 case nir_instr_type_intrinsic
:
1284 instr
= &read_intrinsic(ctx
, header
)->instr
;
1286 case nir_instr_type_load_const
:
1287 instr
= &read_load_const(ctx
, header
)->instr
;
1289 case nir_instr_type_ssa_undef
:
1290 instr
= &read_ssa_undef(ctx
, header
)->instr
;
1292 case nir_instr_type_tex
:
1293 instr
= &read_tex(ctx
, header
)->instr
;
1295 case nir_instr_type_phi
:
1296 /* Phi instructions are a bit of a special case when reading because we
1297 * don't want inserting the instruction to automatically handle use/defs
1298 * for us. Instead, we need to wait until all the blocks/instructions
1299 * are read so that we can set their sources up.
1301 read_phi(ctx
, block
, header
);
1303 case nir_instr_type_jump
:
1304 instr
= &read_jump(ctx
, header
)->instr
;
1306 case nir_instr_type_call
:
1307 instr
= &read_call(ctx
)->instr
;
1309 case nir_instr_type_parallel_copy
:
1310 unreachable("Cannot read parallel copies");
1312 unreachable("bad instr type");
1315 nir_instr_insert_after_block(block
, instr
);
1319 write_block(write_ctx
*ctx
, const nir_block
*block
)
1321 write_add_object(ctx
, block
);
1322 blob_write_uint32(ctx
->blob
, exec_list_length(&block
->instr_list
));
1323 nir_foreach_instr(instr
, block
)
1324 write_instr(ctx
, instr
);
1328 read_block(read_ctx
*ctx
, struct exec_list
*cf_list
)
1330 /* Don't actually create a new block. Just use the one from the tail of
1331 * the list. NIR guarantees that the tail of the list is a block and that
1332 * no two blocks are side-by-side in the IR; It should be empty.
1335 exec_node_data(nir_block
, exec_list_get_tail(cf_list
), cf_node
.node
);
1337 read_add_object(ctx
, block
);
1338 unsigned num_instrs
= blob_read_uint32(ctx
->blob
);
1339 for (unsigned i
= 0; i
< num_instrs
; i
++) {
1340 read_instr(ctx
, block
);
1345 write_cf_list(write_ctx
*ctx
, const struct exec_list
*cf_list
);
1348 read_cf_list(read_ctx
*ctx
, struct exec_list
*cf_list
);
1351 write_if(write_ctx
*ctx
, nir_if
*nif
)
1353 write_src(ctx
, &nif
->condition
);
1355 write_cf_list(ctx
, &nif
->then_list
);
1356 write_cf_list(ctx
, &nif
->else_list
);
1360 read_if(read_ctx
*ctx
, struct exec_list
*cf_list
)
1362 nir_if
*nif
= nir_if_create(ctx
->nir
);
1364 read_src(ctx
, &nif
->condition
, nif
);
1366 nir_cf_node_insert_end(cf_list
, &nif
->cf_node
);
1368 read_cf_list(ctx
, &nif
->then_list
);
1369 read_cf_list(ctx
, &nif
->else_list
);
1373 write_loop(write_ctx
*ctx
, nir_loop
*loop
)
1375 write_cf_list(ctx
, &loop
->body
);
1379 read_loop(read_ctx
*ctx
, struct exec_list
*cf_list
)
1381 nir_loop
*loop
= nir_loop_create(ctx
->nir
);
1383 nir_cf_node_insert_end(cf_list
, &loop
->cf_node
);
1385 read_cf_list(ctx
, &loop
->body
);
1389 write_cf_node(write_ctx
*ctx
, nir_cf_node
*cf
)
1391 blob_write_uint32(ctx
->blob
, cf
->type
);
1394 case nir_cf_node_block
:
1395 write_block(ctx
, nir_cf_node_as_block(cf
));
1397 case nir_cf_node_if
:
1398 write_if(ctx
, nir_cf_node_as_if(cf
));
1400 case nir_cf_node_loop
:
1401 write_loop(ctx
, nir_cf_node_as_loop(cf
));
1404 unreachable("bad cf type");
1409 read_cf_node(read_ctx
*ctx
, struct exec_list
*list
)
1411 nir_cf_node_type type
= blob_read_uint32(ctx
->blob
);
1414 case nir_cf_node_block
:
1415 read_block(ctx
, list
);
1417 case nir_cf_node_if
:
1420 case nir_cf_node_loop
:
1421 read_loop(ctx
, list
);
1424 unreachable("bad cf type");
1429 write_cf_list(write_ctx
*ctx
, const struct exec_list
*cf_list
)
1431 blob_write_uint32(ctx
->blob
, exec_list_length(cf_list
));
1432 foreach_list_typed(nir_cf_node
, cf
, node
, cf_list
) {
1433 write_cf_node(ctx
, cf
);
1438 read_cf_list(read_ctx
*ctx
, struct exec_list
*cf_list
)
1440 uint32_t num_cf_nodes
= blob_read_uint32(ctx
->blob
);
1441 for (unsigned i
= 0; i
< num_cf_nodes
; i
++)
1442 read_cf_node(ctx
, cf_list
);
1446 write_function_impl(write_ctx
*ctx
, const nir_function_impl
*fi
)
1448 write_var_list(ctx
, &fi
->locals
);
1449 write_reg_list(ctx
, &fi
->registers
);
1450 blob_write_uint32(ctx
->blob
, fi
->reg_alloc
);
1452 write_cf_list(ctx
, &fi
->body
);
1453 write_fixup_phis(ctx
);
1456 static nir_function_impl
*
1457 read_function_impl(read_ctx
*ctx
, nir_function
*fxn
)
1459 nir_function_impl
*fi
= nir_function_impl_create_bare(ctx
->nir
);
1462 read_var_list(ctx
, &fi
->locals
);
1463 read_reg_list(ctx
, &fi
->registers
);
1464 fi
->reg_alloc
= blob_read_uint32(ctx
->blob
);
1466 read_cf_list(ctx
, &fi
->body
);
1467 read_fixup_phis(ctx
);
1469 fi
->valid_metadata
= 0;
1475 write_function(write_ctx
*ctx
, const nir_function
*fxn
)
1477 uint32_t flags
= fxn
->is_entrypoint
;
1482 blob_write_uint32(ctx
->blob
, flags
);
1484 blob_write_string(ctx
->blob
, fxn
->name
);
1486 write_add_object(ctx
, fxn
);
1488 blob_write_uint32(ctx
->blob
, fxn
->num_params
);
1489 for (unsigned i
= 0; i
< fxn
->num_params
; i
++) {
1491 ((uint32_t)fxn
->params
[i
].num_components
) |
1492 ((uint32_t)fxn
->params
[i
].bit_size
) << 8;
1493 blob_write_uint32(ctx
->blob
, val
);
1496 /* At first glance, it looks like we should write the function_impl here.
1497 * However, call instructions need to be able to reference at least the
1498 * function and those will get processed as we write the function_impls.
1499 * We stop here and write function_impls as a second pass.
1504 read_function(read_ctx
*ctx
)
1506 uint32_t flags
= blob_read_uint32(ctx
->blob
);
1507 bool has_name
= flags
& 0x2;
1508 char *name
= has_name
? blob_read_string(ctx
->blob
) : NULL
;
1510 nir_function
*fxn
= nir_function_create(ctx
->nir
, name
);
1512 read_add_object(ctx
, fxn
);
1514 fxn
->num_params
= blob_read_uint32(ctx
->blob
);
1515 fxn
->params
= ralloc_array(fxn
, nir_parameter
, fxn
->num_params
);
1516 for (unsigned i
= 0; i
< fxn
->num_params
; i
++) {
1517 uint32_t val
= blob_read_uint32(ctx
->blob
);
1518 fxn
->params
[i
].num_components
= val
& 0xff;
1519 fxn
->params
[i
].bit_size
= (val
>> 8) & 0xff;
1522 fxn
->is_entrypoint
= flags
& 0x1;
1524 fxn
->impl
= NIR_SERIALIZE_FUNC_HAS_IMPL
;
1528 * Serialize NIR into a binary blob.
1530 * \param strip Don't serialize information only useful for debugging,
1531 * such as variable names, making cache hits from similar
1532 * shaders more likely.
1535 nir_serialize(struct blob
*blob
, const nir_shader
*nir
, bool strip
)
1537 write_ctx ctx
= {0};
1538 ctx
.remap_table
= _mesa_pointer_hash_table_create(NULL
);
1542 util_dynarray_init(&ctx
.phi_fixups
, NULL
);
1544 size_t idx_size_offset
= blob_reserve_uint32(blob
);
1546 struct shader_info info
= nir
->info
;
1547 uint32_t strings
= 0;
1548 if (!strip
&& info
.name
)
1550 if (!strip
&& info
.label
)
1552 blob_write_uint32(blob
, strings
);
1553 if (!strip
&& info
.name
)
1554 blob_write_string(blob
, info
.name
);
1555 if (!strip
&& info
.label
)
1556 blob_write_string(blob
, info
.label
);
1557 info
.name
= info
.label
= NULL
;
1558 blob_write_bytes(blob
, (uint8_t *) &info
, sizeof(info
));
1560 write_var_list(&ctx
, &nir
->uniforms
);
1561 write_var_list(&ctx
, &nir
->inputs
);
1562 write_var_list(&ctx
, &nir
->outputs
);
1563 write_var_list(&ctx
, &nir
->shared
);
1564 write_var_list(&ctx
, &nir
->globals
);
1565 write_var_list(&ctx
, &nir
->system_values
);
1567 blob_write_uint32(blob
, nir
->num_inputs
);
1568 blob_write_uint32(blob
, nir
->num_uniforms
);
1569 blob_write_uint32(blob
, nir
->num_outputs
);
1570 blob_write_uint32(blob
, nir
->num_shared
);
1571 blob_write_uint32(blob
, nir
->scratch_size
);
1573 blob_write_uint32(blob
, exec_list_length(&nir
->functions
));
1574 nir_foreach_function(fxn
, nir
) {
1575 write_function(&ctx
, fxn
);
1578 nir_foreach_function(fxn
, nir
) {
1580 write_function_impl(&ctx
, fxn
->impl
);
1583 blob_write_uint32(blob
, nir
->constant_data_size
);
1584 if (nir
->constant_data_size
> 0)
1585 blob_write_bytes(blob
, nir
->constant_data
, nir
->constant_data_size
);
1587 *(uint32_t *)(blob
->data
+ idx_size_offset
) = ctx
.next_idx
;
1589 _mesa_hash_table_destroy(ctx
.remap_table
, NULL
);
1590 util_dynarray_fini(&ctx
.phi_fixups
);
1594 nir_deserialize(void *mem_ctx
,
1595 const struct nir_shader_compiler_options
*options
,
1596 struct blob_reader
*blob
)
1600 list_inithead(&ctx
.phi_srcs
);
1601 ctx
.idx_table_len
= blob_read_uint32(blob
);
1602 ctx
.idx_table
= calloc(ctx
.idx_table_len
, sizeof(uintptr_t));
1604 uint32_t strings
= blob_read_uint32(blob
);
1605 char *name
= (strings
& 0x1) ? blob_read_string(blob
) : NULL
;
1606 char *label
= (strings
& 0x2) ? blob_read_string(blob
) : NULL
;
1608 struct shader_info info
;
1609 blob_copy_bytes(blob
, (uint8_t *) &info
, sizeof(info
));
1611 ctx
.nir
= nir_shader_create(mem_ctx
, info
.stage
, options
, NULL
);
1613 info
.name
= name
? ralloc_strdup(ctx
.nir
, name
) : NULL
;
1614 info
.label
= label
? ralloc_strdup(ctx
.nir
, label
) : NULL
;
1616 ctx
.nir
->info
= info
;
1618 read_var_list(&ctx
, &ctx
.nir
->uniforms
);
1619 read_var_list(&ctx
, &ctx
.nir
->inputs
);
1620 read_var_list(&ctx
, &ctx
.nir
->outputs
);
1621 read_var_list(&ctx
, &ctx
.nir
->shared
);
1622 read_var_list(&ctx
, &ctx
.nir
->globals
);
1623 read_var_list(&ctx
, &ctx
.nir
->system_values
);
1625 ctx
.nir
->num_inputs
= blob_read_uint32(blob
);
1626 ctx
.nir
->num_uniforms
= blob_read_uint32(blob
);
1627 ctx
.nir
->num_outputs
= blob_read_uint32(blob
);
1628 ctx
.nir
->num_shared
= blob_read_uint32(blob
);
1629 ctx
.nir
->scratch_size
= blob_read_uint32(blob
);
1631 unsigned num_functions
= blob_read_uint32(blob
);
1632 for (unsigned i
= 0; i
< num_functions
; i
++)
1633 read_function(&ctx
);
1635 nir_foreach_function(fxn
, ctx
.nir
) {
1636 if (fxn
->impl
== NIR_SERIALIZE_FUNC_HAS_IMPL
)
1637 fxn
->impl
= read_function_impl(&ctx
, fxn
);
1640 ctx
.nir
->constant_data_size
= blob_read_uint32(blob
);
1641 if (ctx
.nir
->constant_data_size
> 0) {
1642 ctx
.nir
->constant_data
=
1643 ralloc_size(ctx
.nir
, ctx
.nir
->constant_data_size
);
1644 blob_copy_bytes(blob
, ctx
.nir
->constant_data
,
1645 ctx
.nir
->constant_data_size
);
1648 free(ctx
.idx_table
);
1654 nir_shader_serialize_deserialize(nir_shader
*shader
)
1656 const struct nir_shader_compiler_options
*options
= shader
->options
;
1660 nir_serialize(&writer
, shader
, false);
1662 /* Delete all of dest's ralloc children but leave dest alone */
1663 void *dead_ctx
= ralloc_context(NULL
);
1664 ralloc_adopt(dead_ctx
, shader
);
1665 ralloc_free(dead_ctx
);
1667 dead_ctx
= ralloc_context(NULL
);
1669 struct blob_reader reader
;
1670 blob_reader_init(&reader
, writer
.data
, writer
.size
);
1671 nir_shader
*copy
= nir_deserialize(dead_ctx
, options
, &reader
);
1673 blob_finish(&writer
);
1675 nir_shader_replace(shader
, copy
);
1676 ralloc_free(dead_ctx
);