2 * Copyright © 2017 Connor Abbott
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "util/u_dynarray.h"
28 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
29 #define MAX_OBJECT_IDS (1 << 30)
38 const nir_shader
*nir
;
42 /* maps pointer to index */
43 struct hash_table
*remap_table
;
45 /* the next index to assign to a NIR in-memory object */
48 /* Array of write_phi_fixup structs representing phi sources that need to
49 * be resolved in the second pass.
51 struct util_dynarray phi_fixups
;
53 /* Don't write optional data such as variable names. */
60 struct blob_reader
*blob
;
62 /* the next index to assign to a NIR in-memory object */
65 /* The length of the index -> object table */
66 uint32_t idx_table_len
;
68 /* map from index to deserialized pointer */
71 /* List of phi sources. */
72 struct list_head phi_srcs
;
77 write_add_object(write_ctx
*ctx
, const void *obj
)
79 uint32_t index
= ctx
->next_idx
++;
80 assert(index
!= MAX_OBJECT_IDS
);
81 _mesa_hash_table_insert(ctx
->remap_table
, obj
, (void *)(uintptr_t) index
);
85 write_lookup_object(write_ctx
*ctx
, const void *obj
)
87 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->remap_table
, obj
);
89 return (uint32_t)(uintptr_t) entry
->data
;
93 write_object(write_ctx
*ctx
, const void *obj
)
95 blob_write_uint32(ctx
->blob
, write_lookup_object(ctx
, obj
));
99 read_add_object(read_ctx
*ctx
, void *obj
)
101 assert(ctx
->next_idx
< ctx
->idx_table_len
);
102 ctx
->idx_table
[ctx
->next_idx
++] = obj
;
106 read_lookup_object(read_ctx
*ctx
, uint32_t idx
)
108 assert(idx
< ctx
->idx_table_len
);
109 return ctx
->idx_table
[idx
];
113 read_object(read_ctx
*ctx
)
115 return read_lookup_object(ctx
, blob_read_uint32(ctx
->blob
));
119 write_constant(write_ctx
*ctx
, const nir_constant
*c
)
121 blob_write_bytes(ctx
->blob
, c
->values
, sizeof(c
->values
));
122 blob_write_uint32(ctx
->blob
, c
->num_elements
);
123 for (unsigned i
= 0; i
< c
->num_elements
; i
++)
124 write_constant(ctx
, c
->elements
[i
]);
127 static nir_constant
*
128 read_constant(read_ctx
*ctx
, nir_variable
*nvar
)
130 nir_constant
*c
= ralloc(nvar
, nir_constant
);
132 blob_copy_bytes(ctx
->blob
, (uint8_t *)c
->values
, sizeof(c
->values
));
133 c
->num_elements
= blob_read_uint32(ctx
->blob
);
134 c
->elements
= ralloc_array(nvar
, nir_constant
*, c
->num_elements
);
135 for (unsigned i
= 0; i
< c
->num_elements
; i
++)
136 c
->elements
[i
] = read_constant(ctx
, nvar
);
145 unsigned has_constant_initializer
:1;
146 unsigned has_interface_type
:1;
147 unsigned num_state_slots
:13;
148 unsigned num_members
:16;
153 write_variable(write_ctx
*ctx
, const nir_variable
*var
)
155 write_add_object(ctx
, var
);
156 encode_type_to_blob(ctx
->blob
, var
->type
);
158 assert(var
->num_state_slots
< (1 << 13));
159 assert(var
->num_members
< (1 << 16));
161 STATIC_ASSERT(sizeof(union packed_var
) == 4);
162 union packed_var flags
;
165 flags
.u
.has_name
= !ctx
->strip
&& var
->name
;
166 flags
.u
.has_constant_initializer
= !!(var
->constant_initializer
);
167 flags
.u
.has_interface_type
= !!(var
->interface_type
);
168 flags
.u
.num_state_slots
= var
->num_state_slots
;
169 flags
.u
.num_members
= var
->num_members
;
171 blob_write_uint32(ctx
->blob
, flags
.u32
);
173 if (flags
.u
.has_name
)
174 blob_write_string(ctx
->blob
, var
->name
);
176 struct nir_variable_data data
= var
->data
;
178 /* When stripping, we expect that the location is no longer needed,
179 * which is typically after shaders are linked.
182 data
.mode
!= nir_var_shader_in
&&
183 data
.mode
!= nir_var_shader_out
)
186 blob_write_bytes(ctx
->blob
, &data
, sizeof(data
));
188 for (unsigned i
= 0; i
< var
->num_state_slots
; i
++) {
189 blob_write_bytes(ctx
->blob
, &var
->state_slots
[i
],
190 sizeof(var
->state_slots
[i
]));
192 if (var
->constant_initializer
)
193 write_constant(ctx
, var
->constant_initializer
);
194 if (var
->interface_type
)
195 encode_type_to_blob(ctx
->blob
, var
->interface_type
);
196 if (var
->num_members
> 0) {
197 blob_write_bytes(ctx
->blob
, (uint8_t *) var
->members
,
198 var
->num_members
* sizeof(*var
->members
));
202 static nir_variable
*
203 read_variable(read_ctx
*ctx
)
205 nir_variable
*var
= rzalloc(ctx
->nir
, nir_variable
);
206 read_add_object(ctx
, var
);
208 var
->type
= decode_type_from_blob(ctx
->blob
);
210 union packed_var flags
;
211 flags
.u32
= blob_read_uint32(ctx
->blob
);
213 if (flags
.u
.has_name
) {
214 const char *name
= blob_read_string(ctx
->blob
);
215 var
->name
= ralloc_strdup(var
, name
);
219 blob_copy_bytes(ctx
->blob
, (uint8_t *) &var
->data
, sizeof(var
->data
));
220 var
->num_state_slots
= flags
.u
.num_state_slots
;
221 if (var
->num_state_slots
!= 0) {
222 var
->state_slots
= ralloc_array(var
, nir_state_slot
,
223 var
->num_state_slots
);
224 for (unsigned i
= 0; i
< var
->num_state_slots
; i
++) {
225 blob_copy_bytes(ctx
->blob
, &var
->state_slots
[i
],
226 sizeof(var
->state_slots
[i
]));
229 if (flags
.u
.has_constant_initializer
)
230 var
->constant_initializer
= read_constant(ctx
, var
);
232 var
->constant_initializer
= NULL
;
233 if (flags
.u
.has_interface_type
)
234 var
->interface_type
= decode_type_from_blob(ctx
->blob
);
236 var
->interface_type
= NULL
;
237 var
->num_members
= flags
.u
.num_members
;
238 if (var
->num_members
> 0) {
239 var
->members
= ralloc_array(var
, struct nir_variable_data
,
241 blob_copy_bytes(ctx
->blob
, (uint8_t *) var
->members
,
242 var
->num_members
* sizeof(*var
->members
));
249 write_var_list(write_ctx
*ctx
, const struct exec_list
*src
)
251 blob_write_uint32(ctx
->blob
, exec_list_length(src
));
252 foreach_list_typed(nir_variable
, var
, node
, src
) {
253 write_variable(ctx
, var
);
258 read_var_list(read_ctx
*ctx
, struct exec_list
*dst
)
260 exec_list_make_empty(dst
);
261 unsigned num_vars
= blob_read_uint32(ctx
->blob
);
262 for (unsigned i
= 0; i
< num_vars
; i
++) {
263 nir_variable
*var
= read_variable(ctx
);
264 exec_list_push_tail(dst
, &var
->node
);
269 write_register(write_ctx
*ctx
, const nir_register
*reg
)
271 write_add_object(ctx
, reg
);
272 blob_write_uint32(ctx
->blob
, reg
->num_components
);
273 blob_write_uint32(ctx
->blob
, reg
->bit_size
);
274 blob_write_uint32(ctx
->blob
, reg
->num_array_elems
);
275 blob_write_uint32(ctx
->blob
, reg
->index
);
276 blob_write_uint32(ctx
->blob
, !ctx
->strip
&& reg
->name
);
277 if (!ctx
->strip
&& reg
->name
)
278 blob_write_string(ctx
->blob
, reg
->name
);
281 static nir_register
*
282 read_register(read_ctx
*ctx
)
284 nir_register
*reg
= ralloc(ctx
->nir
, nir_register
);
285 read_add_object(ctx
, reg
);
286 reg
->num_components
= blob_read_uint32(ctx
->blob
);
287 reg
->bit_size
= blob_read_uint32(ctx
->blob
);
288 reg
->num_array_elems
= blob_read_uint32(ctx
->blob
);
289 reg
->index
= blob_read_uint32(ctx
->blob
);
290 bool has_name
= blob_read_uint32(ctx
->blob
);
292 const char *name
= blob_read_string(ctx
->blob
);
293 reg
->name
= ralloc_strdup(reg
, name
);
298 list_inithead(®
->uses
);
299 list_inithead(®
->defs
);
300 list_inithead(®
->if_uses
);
306 write_reg_list(write_ctx
*ctx
, const struct exec_list
*src
)
308 blob_write_uint32(ctx
->blob
, exec_list_length(src
));
309 foreach_list_typed(nir_register
, reg
, node
, src
)
310 write_register(ctx
, reg
);
314 read_reg_list(read_ctx
*ctx
, struct exec_list
*dst
)
316 exec_list_make_empty(dst
);
317 unsigned num_regs
= blob_read_uint32(ctx
->blob
);
318 for (unsigned i
= 0; i
< num_regs
; i
++) {
319 nir_register
*reg
= read_register(ctx
);
320 exec_list_push_tail(dst
, ®
->node
);
325 write_src(write_ctx
*ctx
, const nir_src
*src
)
327 /* Since sources are very frequent, we try to save some space when storing
328 * them. In particular, we store whether the source is a register and
329 * whether the register has an indirect index in the low two bits. We can
330 * assume that the high two bits of the index are zero, since otherwise our
331 * address space would've been exhausted allocating the remap table!
334 uint32_t idx
= write_lookup_object(ctx
, src
->ssa
) << 2;
336 blob_write_uint32(ctx
->blob
, idx
);
338 uint32_t idx
= write_lookup_object(ctx
, src
->reg
.reg
) << 2;
339 if (src
->reg
.indirect
)
341 blob_write_uint32(ctx
->blob
, idx
);
342 blob_write_uint32(ctx
->blob
, src
->reg
.base_offset
);
343 if (src
->reg
.indirect
) {
344 write_src(ctx
, src
->reg
.indirect
);
350 read_src(read_ctx
*ctx
, nir_src
*src
, void *mem_ctx
)
352 uint32_t val
= blob_read_uint32(ctx
->blob
);
353 uint32_t idx
= val
>> 2;
354 src
->is_ssa
= val
& 0x1;
356 src
->ssa
= read_lookup_object(ctx
, idx
);
358 bool is_indirect
= val
& 0x2;
359 src
->reg
.reg
= read_lookup_object(ctx
, idx
);
360 src
->reg
.base_offset
= blob_read_uint32(ctx
->blob
);
362 src
->reg
.indirect
= ralloc(mem_ctx
, nir_src
);
363 read_src(ctx
, src
->reg
.indirect
, mem_ctx
);
365 src
->reg
.indirect
= NULL
;
371 write_dest(write_ctx
*ctx
, const nir_dest
*dst
)
373 uint32_t val
= dst
->is_ssa
;
375 val
|= (!ctx
->strip
&& dst
->ssa
.name
) << 1;
376 val
|= dst
->ssa
.num_components
<< 2;
377 val
|= dst
->ssa
.bit_size
<< 5;
379 val
|= !!(dst
->reg
.indirect
) << 1;
381 blob_write_uint32(ctx
->blob
, val
);
383 write_add_object(ctx
, &dst
->ssa
);
384 if (!ctx
->strip
&& dst
->ssa
.name
)
385 blob_write_string(ctx
->blob
, dst
->ssa
.name
);
387 blob_write_uint32(ctx
->blob
, write_lookup_object(ctx
, dst
->reg
.reg
));
388 blob_write_uint32(ctx
->blob
, dst
->reg
.base_offset
);
389 if (dst
->reg
.indirect
)
390 write_src(ctx
, dst
->reg
.indirect
);
395 read_dest(read_ctx
*ctx
, nir_dest
*dst
, nir_instr
*instr
)
397 uint32_t val
= blob_read_uint32(ctx
->blob
);
398 bool is_ssa
= val
& 0x1;
400 bool has_name
= val
& 0x2;
401 unsigned num_components
= (val
>> 2) & 0x7;
402 unsigned bit_size
= val
>> 5;
403 char *name
= has_name
? blob_read_string(ctx
->blob
) : NULL
;
404 nir_ssa_dest_init(instr
, dst
, num_components
, bit_size
, name
);
405 read_add_object(ctx
, &dst
->ssa
);
407 bool is_indirect
= val
& 0x2;
408 dst
->reg
.reg
= read_object(ctx
);
409 dst
->reg
.base_offset
= blob_read_uint32(ctx
->blob
);
411 dst
->reg
.indirect
= ralloc(instr
, nir_src
);
412 read_src(ctx
, dst
->reg
.indirect
, instr
);
418 write_alu(write_ctx
*ctx
, const nir_alu_instr
*alu
)
420 blob_write_uint32(ctx
->blob
, alu
->op
);
421 uint32_t flags
= alu
->exact
;
422 flags
|= alu
->no_signed_wrap
<< 1;
423 flags
|= alu
->no_unsigned_wrap
<< 2;
424 flags
|= alu
->dest
.saturate
<< 3;
425 flags
|= alu
->dest
.write_mask
<< 4;
426 blob_write_uint32(ctx
->blob
, flags
);
428 write_dest(ctx
, &alu
->dest
.dest
);
430 for (unsigned i
= 0; i
< nir_op_infos
[alu
->op
].num_inputs
; i
++) {
431 write_src(ctx
, &alu
->src
[i
].src
);
432 flags
= alu
->src
[i
].negate
;
433 flags
|= alu
->src
[i
].abs
<< 1;
434 for (unsigned j
= 0; j
< 4; j
++)
435 flags
|= alu
->src
[i
].swizzle
[j
] << (2 + 2 * j
);
436 blob_write_uint32(ctx
->blob
, flags
);
440 static nir_alu_instr
*
441 read_alu(read_ctx
*ctx
)
443 nir_op op
= blob_read_uint32(ctx
->blob
);
444 nir_alu_instr
*alu
= nir_alu_instr_create(ctx
->nir
, op
);
446 uint32_t flags
= blob_read_uint32(ctx
->blob
);
447 alu
->exact
= flags
& 1;
448 alu
->no_signed_wrap
= flags
& 2;
449 alu
->no_unsigned_wrap
= flags
& 4;
450 alu
->dest
.saturate
= flags
& 8;
451 alu
->dest
.write_mask
= flags
>> 4;
453 read_dest(ctx
, &alu
->dest
.dest
, &alu
->instr
);
455 for (unsigned i
= 0; i
< nir_op_infos
[op
].num_inputs
; i
++) {
456 read_src(ctx
, &alu
->src
[i
].src
, &alu
->instr
);
457 flags
= blob_read_uint32(ctx
->blob
);
458 alu
->src
[i
].negate
= flags
& 1;
459 alu
->src
[i
].abs
= flags
& 2;
460 for (unsigned j
= 0; j
< 4; j
++)
461 alu
->src
[i
].swizzle
[j
] = (flags
>> (2 * j
+ 2)) & 3;
468 write_deref(write_ctx
*ctx
, const nir_deref_instr
*deref
)
470 blob_write_uint32(ctx
->blob
, deref
->deref_type
);
472 blob_write_uint32(ctx
->blob
, deref
->mode
);
473 encode_type_to_blob(ctx
->blob
, deref
->type
);
475 write_dest(ctx
, &deref
->dest
);
477 if (deref
->deref_type
== nir_deref_type_var
) {
478 write_object(ctx
, deref
->var
);
482 write_src(ctx
, &deref
->parent
);
484 switch (deref
->deref_type
) {
485 case nir_deref_type_struct
:
486 blob_write_uint32(ctx
->blob
, deref
->strct
.index
);
489 case nir_deref_type_array
:
490 case nir_deref_type_ptr_as_array
:
491 write_src(ctx
, &deref
->arr
.index
);
494 case nir_deref_type_cast
:
495 blob_write_uint32(ctx
->blob
, deref
->cast
.ptr_stride
);
498 case nir_deref_type_array_wildcard
:
503 unreachable("Invalid deref type");
507 static nir_deref_instr
*
508 read_deref(read_ctx
*ctx
)
510 nir_deref_type deref_type
= blob_read_uint32(ctx
->blob
);
511 nir_deref_instr
*deref
= nir_deref_instr_create(ctx
->nir
, deref_type
);
513 deref
->mode
= blob_read_uint32(ctx
->blob
);
514 deref
->type
= decode_type_from_blob(ctx
->blob
);
516 read_dest(ctx
, &deref
->dest
, &deref
->instr
);
518 if (deref_type
== nir_deref_type_var
) {
519 deref
->var
= read_object(ctx
);
523 read_src(ctx
, &deref
->parent
, &deref
->instr
);
525 switch (deref
->deref_type
) {
526 case nir_deref_type_struct
:
527 deref
->strct
.index
= blob_read_uint32(ctx
->blob
);
530 case nir_deref_type_array
:
531 case nir_deref_type_ptr_as_array
:
532 read_src(ctx
, &deref
->arr
.index
, &deref
->instr
);
535 case nir_deref_type_cast
:
536 deref
->cast
.ptr_stride
= blob_read_uint32(ctx
->blob
);
539 case nir_deref_type_array_wildcard
:
544 unreachable("Invalid deref type");
551 write_intrinsic(write_ctx
*ctx
, const nir_intrinsic_instr
*intrin
)
553 blob_write_uint32(ctx
->blob
, intrin
->intrinsic
);
555 unsigned num_srcs
= nir_intrinsic_infos
[intrin
->intrinsic
].num_srcs
;
556 unsigned num_indices
= nir_intrinsic_infos
[intrin
->intrinsic
].num_indices
;
558 blob_write_uint32(ctx
->blob
, intrin
->num_components
);
560 if (nir_intrinsic_infos
[intrin
->intrinsic
].has_dest
)
561 write_dest(ctx
, &intrin
->dest
);
563 for (unsigned i
= 0; i
< num_srcs
; i
++)
564 write_src(ctx
, &intrin
->src
[i
]);
566 for (unsigned i
= 0; i
< num_indices
; i
++)
567 blob_write_uint32(ctx
->blob
, intrin
->const_index
[i
]);
570 static nir_intrinsic_instr
*
571 read_intrinsic(read_ctx
*ctx
)
573 nir_intrinsic_op op
= blob_read_uint32(ctx
->blob
);
575 nir_intrinsic_instr
*intrin
= nir_intrinsic_instr_create(ctx
->nir
, op
);
577 unsigned num_srcs
= nir_intrinsic_infos
[op
].num_srcs
;
578 unsigned num_indices
= nir_intrinsic_infos
[op
].num_indices
;
580 intrin
->num_components
= blob_read_uint32(ctx
->blob
);
582 if (nir_intrinsic_infos
[op
].has_dest
)
583 read_dest(ctx
, &intrin
->dest
, &intrin
->instr
);
585 for (unsigned i
= 0; i
< num_srcs
; i
++)
586 read_src(ctx
, &intrin
->src
[i
], &intrin
->instr
);
588 for (unsigned i
= 0; i
< num_indices
; i
++)
589 intrin
->const_index
[i
] = blob_read_uint32(ctx
->blob
);
595 write_load_const(write_ctx
*ctx
, const nir_load_const_instr
*lc
)
597 uint32_t val
= lc
->def
.num_components
;
598 val
|= lc
->def
.bit_size
<< 3;
599 blob_write_uint32(ctx
->blob
, val
);
600 blob_write_bytes(ctx
->blob
, lc
->value
, sizeof(*lc
->value
) * lc
->def
.num_components
);
601 write_add_object(ctx
, &lc
->def
);
604 static nir_load_const_instr
*
605 read_load_const(read_ctx
*ctx
)
607 uint32_t val
= blob_read_uint32(ctx
->blob
);
609 nir_load_const_instr
*lc
=
610 nir_load_const_instr_create(ctx
->nir
, val
& 0x7, val
>> 3);
612 blob_copy_bytes(ctx
->blob
, lc
->value
, sizeof(*lc
->value
) * lc
->def
.num_components
);
613 read_add_object(ctx
, &lc
->def
);
618 write_ssa_undef(write_ctx
*ctx
, const nir_ssa_undef_instr
*undef
)
620 uint32_t val
= undef
->def
.num_components
;
621 val
|= undef
->def
.bit_size
<< 3;
622 blob_write_uint32(ctx
->blob
, val
);
623 write_add_object(ctx
, &undef
->def
);
626 static nir_ssa_undef_instr
*
627 read_ssa_undef(read_ctx
*ctx
)
629 uint32_t val
= blob_read_uint32(ctx
->blob
);
631 nir_ssa_undef_instr
*undef
=
632 nir_ssa_undef_instr_create(ctx
->nir
, val
& 0x7, val
>> 3);
634 read_add_object(ctx
, &undef
->def
);
638 union packed_tex_data
{
641 enum glsl_sampler_dim sampler_dim
:4;
642 nir_alu_type dest_type
:8;
643 unsigned coord_components
:3;
645 unsigned is_shadow
:1;
646 unsigned is_new_style_shadow
:1;
647 unsigned component
:2;
648 unsigned unused
:10; /* Mark unused for valgrind. */
653 write_tex(write_ctx
*ctx
, const nir_tex_instr
*tex
)
655 blob_write_uint32(ctx
->blob
, tex
->num_srcs
);
656 blob_write_uint32(ctx
->blob
, tex
->op
);
657 blob_write_uint32(ctx
->blob
, tex
->texture_index
);
658 blob_write_uint32(ctx
->blob
, tex
->texture_array_size
);
659 blob_write_uint32(ctx
->blob
, tex
->sampler_index
);
660 blob_write_bytes(ctx
->blob
, tex
->tg4_offsets
, sizeof(tex
->tg4_offsets
));
662 STATIC_ASSERT(sizeof(union packed_tex_data
) == sizeof(uint32_t));
663 union packed_tex_data packed
= {
664 .u
.sampler_dim
= tex
->sampler_dim
,
665 .u
.dest_type
= tex
->dest_type
,
666 .u
.coord_components
= tex
->coord_components
,
667 .u
.is_array
= tex
->is_array
,
668 .u
.is_shadow
= tex
->is_shadow
,
669 .u
.is_new_style_shadow
= tex
->is_new_style_shadow
,
670 .u
.component
= tex
->component
,
672 blob_write_uint32(ctx
->blob
, packed
.u32
);
674 write_dest(ctx
, &tex
->dest
);
675 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
676 blob_write_uint32(ctx
->blob
, tex
->src
[i
].src_type
);
677 write_src(ctx
, &tex
->src
[i
].src
);
681 static nir_tex_instr
*
682 read_tex(read_ctx
*ctx
)
684 unsigned num_srcs
= blob_read_uint32(ctx
->blob
);
685 nir_tex_instr
*tex
= nir_tex_instr_create(ctx
->nir
, num_srcs
);
687 tex
->op
= blob_read_uint32(ctx
->blob
);
688 tex
->texture_index
= blob_read_uint32(ctx
->blob
);
689 tex
->texture_array_size
= blob_read_uint32(ctx
->blob
);
690 tex
->sampler_index
= blob_read_uint32(ctx
->blob
);
691 blob_copy_bytes(ctx
->blob
, tex
->tg4_offsets
, sizeof(tex
->tg4_offsets
));
693 union packed_tex_data packed
;
694 packed
.u32
= blob_read_uint32(ctx
->blob
);
695 tex
->sampler_dim
= packed
.u
.sampler_dim
;
696 tex
->dest_type
= packed
.u
.dest_type
;
697 tex
->coord_components
= packed
.u
.coord_components
;
698 tex
->is_array
= packed
.u
.is_array
;
699 tex
->is_shadow
= packed
.u
.is_shadow
;
700 tex
->is_new_style_shadow
= packed
.u
.is_new_style_shadow
;
701 tex
->component
= packed
.u
.component
;
703 read_dest(ctx
, &tex
->dest
, &tex
->instr
);
704 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
705 tex
->src
[i
].src_type
= blob_read_uint32(ctx
->blob
);
706 read_src(ctx
, &tex
->src
[i
].src
, &tex
->instr
);
713 write_phi(write_ctx
*ctx
, const nir_phi_instr
*phi
)
715 /* Phi nodes are special, since they may reference SSA definitions and
716 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
717 * and then store enough information so that a later fixup pass can fill
720 write_dest(ctx
, &phi
->dest
);
722 blob_write_uint32(ctx
->blob
, exec_list_length(&phi
->srcs
));
724 nir_foreach_phi_src(src
, phi
) {
725 assert(src
->src
.is_ssa
);
726 size_t blob_offset
= blob_reserve_uint32(ctx
->blob
);
727 ASSERTED
size_t blob_offset2
= blob_reserve_uint32(ctx
->blob
);
728 assert(blob_offset
+ sizeof(uint32_t) == blob_offset2
);
729 write_phi_fixup fixup
= {
730 .blob_offset
= blob_offset
,
734 util_dynarray_append(&ctx
->phi_fixups
, write_phi_fixup
, fixup
);
739 write_fixup_phis(write_ctx
*ctx
)
741 util_dynarray_foreach(&ctx
->phi_fixups
, write_phi_fixup
, fixup
) {
742 uint32_t *blob_ptr
= (uint32_t *)(ctx
->blob
->data
+ fixup
->blob_offset
);
743 blob_ptr
[0] = write_lookup_object(ctx
, fixup
->src
);
744 blob_ptr
[1] = write_lookup_object(ctx
, fixup
->block
);
747 util_dynarray_clear(&ctx
->phi_fixups
);
750 static nir_phi_instr
*
751 read_phi(read_ctx
*ctx
, nir_block
*blk
)
753 nir_phi_instr
*phi
= nir_phi_instr_create(ctx
->nir
);
755 read_dest(ctx
, &phi
->dest
, &phi
->instr
);
757 unsigned num_srcs
= blob_read_uint32(ctx
->blob
);
759 /* For similar reasons as before, we just store the index directly into the
760 * pointer, and let a later pass resolve the phi sources.
762 * In order to ensure that the copied sources (which are just the indices
763 * from the blob for now) don't get inserted into the old shader's use-def
764 * lists, we have to add the phi instruction *before* we set up its
767 nir_instr_insert_after_block(blk
, &phi
->instr
);
769 for (unsigned i
= 0; i
< num_srcs
; i
++) {
770 nir_phi_src
*src
= ralloc(phi
, nir_phi_src
);
772 src
->src
.is_ssa
= true;
773 src
->src
.ssa
= (nir_ssa_def
*)(uintptr_t) blob_read_uint32(ctx
->blob
);
774 src
->pred
= (nir_block
*)(uintptr_t) blob_read_uint32(ctx
->blob
);
776 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
777 * we have to set the parent_instr manually. It doesn't really matter
778 * when we do it, so we might as well do it here.
780 src
->src
.parent_instr
= &phi
->instr
;
782 /* Stash it in the list of phi sources. We'll walk this list and fix up
783 * sources at the very end of read_function_impl.
785 list_add(&src
->src
.use_link
, &ctx
->phi_srcs
);
787 exec_list_push_tail(&phi
->srcs
, &src
->node
);
794 read_fixup_phis(read_ctx
*ctx
)
796 list_for_each_entry_safe(nir_phi_src
, src
, &ctx
->phi_srcs
, src
.use_link
) {
797 src
->pred
= read_lookup_object(ctx
, (uintptr_t)src
->pred
);
798 src
->src
.ssa
= read_lookup_object(ctx
, (uintptr_t)src
->src
.ssa
);
800 /* Remove from this list */
801 list_del(&src
->src
.use_link
);
803 list_addtail(&src
->src
.use_link
, &src
->src
.ssa
->uses
);
805 assert(list_is_empty(&ctx
->phi_srcs
));
809 write_jump(write_ctx
*ctx
, const nir_jump_instr
*jmp
)
811 blob_write_uint32(ctx
->blob
, jmp
->type
);
814 static nir_jump_instr
*
815 read_jump(read_ctx
*ctx
)
817 nir_jump_type type
= blob_read_uint32(ctx
->blob
);
818 nir_jump_instr
*jmp
= nir_jump_instr_create(ctx
->nir
, type
);
823 write_call(write_ctx
*ctx
, const nir_call_instr
*call
)
825 blob_write_uint32(ctx
->blob
, write_lookup_object(ctx
, call
->callee
));
827 for (unsigned i
= 0; i
< call
->num_params
; i
++)
828 write_src(ctx
, &call
->params
[i
]);
831 static nir_call_instr
*
832 read_call(read_ctx
*ctx
)
834 nir_function
*callee
= read_object(ctx
);
835 nir_call_instr
*call
= nir_call_instr_create(ctx
->nir
, callee
);
837 for (unsigned i
= 0; i
< call
->num_params
; i
++)
838 read_src(ctx
, &call
->params
[i
], call
);
844 write_instr(write_ctx
*ctx
, const nir_instr
*instr
)
846 blob_write_uint32(ctx
->blob
, instr
->type
);
847 switch (instr
->type
) {
848 case nir_instr_type_alu
:
849 write_alu(ctx
, nir_instr_as_alu(instr
));
851 case nir_instr_type_deref
:
852 write_deref(ctx
, nir_instr_as_deref(instr
));
854 case nir_instr_type_intrinsic
:
855 write_intrinsic(ctx
, nir_instr_as_intrinsic(instr
));
857 case nir_instr_type_load_const
:
858 write_load_const(ctx
, nir_instr_as_load_const(instr
));
860 case nir_instr_type_ssa_undef
:
861 write_ssa_undef(ctx
, nir_instr_as_ssa_undef(instr
));
863 case nir_instr_type_tex
:
864 write_tex(ctx
, nir_instr_as_tex(instr
));
866 case nir_instr_type_phi
:
867 write_phi(ctx
, nir_instr_as_phi(instr
));
869 case nir_instr_type_jump
:
870 write_jump(ctx
, nir_instr_as_jump(instr
));
872 case nir_instr_type_call
:
873 write_call(ctx
, nir_instr_as_call(instr
));
875 case nir_instr_type_parallel_copy
:
876 unreachable("Cannot write parallel copies");
878 unreachable("bad instr type");
883 read_instr(read_ctx
*ctx
, nir_block
*block
)
885 nir_instr_type type
= blob_read_uint32(ctx
->blob
);
888 case nir_instr_type_alu
:
889 instr
= &read_alu(ctx
)->instr
;
891 case nir_instr_type_deref
:
892 instr
= &read_deref(ctx
)->instr
;
894 case nir_instr_type_intrinsic
:
895 instr
= &read_intrinsic(ctx
)->instr
;
897 case nir_instr_type_load_const
:
898 instr
= &read_load_const(ctx
)->instr
;
900 case nir_instr_type_ssa_undef
:
901 instr
= &read_ssa_undef(ctx
)->instr
;
903 case nir_instr_type_tex
:
904 instr
= &read_tex(ctx
)->instr
;
906 case nir_instr_type_phi
:
907 /* Phi instructions are a bit of a special case when reading because we
908 * don't want inserting the instruction to automatically handle use/defs
909 * for us. Instead, we need to wait until all the blocks/instructions
910 * are read so that we can set their sources up.
912 read_phi(ctx
, block
);
914 case nir_instr_type_jump
:
915 instr
= &read_jump(ctx
)->instr
;
917 case nir_instr_type_call
:
918 instr
= &read_call(ctx
)->instr
;
920 case nir_instr_type_parallel_copy
:
921 unreachable("Cannot read parallel copies");
923 unreachable("bad instr type");
926 nir_instr_insert_after_block(block
, instr
);
930 write_block(write_ctx
*ctx
, const nir_block
*block
)
932 write_add_object(ctx
, block
);
933 blob_write_uint32(ctx
->blob
, exec_list_length(&block
->instr_list
));
934 nir_foreach_instr(instr
, block
)
935 write_instr(ctx
, instr
);
939 read_block(read_ctx
*ctx
, struct exec_list
*cf_list
)
941 /* Don't actually create a new block. Just use the one from the tail of
942 * the list. NIR guarantees that the tail of the list is a block and that
943 * no two blocks are side-by-side in the IR; It should be empty.
946 exec_node_data(nir_block
, exec_list_get_tail(cf_list
), cf_node
.node
);
948 read_add_object(ctx
, block
);
949 unsigned num_instrs
= blob_read_uint32(ctx
->blob
);
950 for (unsigned i
= 0; i
< num_instrs
; i
++) {
951 read_instr(ctx
, block
);
956 write_cf_list(write_ctx
*ctx
, const struct exec_list
*cf_list
);
959 read_cf_list(read_ctx
*ctx
, struct exec_list
*cf_list
);
962 write_if(write_ctx
*ctx
, nir_if
*nif
)
964 write_src(ctx
, &nif
->condition
);
966 write_cf_list(ctx
, &nif
->then_list
);
967 write_cf_list(ctx
, &nif
->else_list
);
971 read_if(read_ctx
*ctx
, struct exec_list
*cf_list
)
973 nir_if
*nif
= nir_if_create(ctx
->nir
);
975 read_src(ctx
, &nif
->condition
, nif
);
977 nir_cf_node_insert_end(cf_list
, &nif
->cf_node
);
979 read_cf_list(ctx
, &nif
->then_list
);
980 read_cf_list(ctx
, &nif
->else_list
);
984 write_loop(write_ctx
*ctx
, nir_loop
*loop
)
986 write_cf_list(ctx
, &loop
->body
);
990 read_loop(read_ctx
*ctx
, struct exec_list
*cf_list
)
992 nir_loop
*loop
= nir_loop_create(ctx
->nir
);
994 nir_cf_node_insert_end(cf_list
, &loop
->cf_node
);
996 read_cf_list(ctx
, &loop
->body
);
1000 write_cf_node(write_ctx
*ctx
, nir_cf_node
*cf
)
1002 blob_write_uint32(ctx
->blob
, cf
->type
);
1005 case nir_cf_node_block
:
1006 write_block(ctx
, nir_cf_node_as_block(cf
));
1008 case nir_cf_node_if
:
1009 write_if(ctx
, nir_cf_node_as_if(cf
));
1011 case nir_cf_node_loop
:
1012 write_loop(ctx
, nir_cf_node_as_loop(cf
));
1015 unreachable("bad cf type");
1020 read_cf_node(read_ctx
*ctx
, struct exec_list
*list
)
1022 nir_cf_node_type type
= blob_read_uint32(ctx
->blob
);
1025 case nir_cf_node_block
:
1026 read_block(ctx
, list
);
1028 case nir_cf_node_if
:
1031 case nir_cf_node_loop
:
1032 read_loop(ctx
, list
);
1035 unreachable("bad cf type");
1040 write_cf_list(write_ctx
*ctx
, const struct exec_list
*cf_list
)
1042 blob_write_uint32(ctx
->blob
, exec_list_length(cf_list
));
1043 foreach_list_typed(nir_cf_node
, cf
, node
, cf_list
) {
1044 write_cf_node(ctx
, cf
);
1049 read_cf_list(read_ctx
*ctx
, struct exec_list
*cf_list
)
1051 uint32_t num_cf_nodes
= blob_read_uint32(ctx
->blob
);
1052 for (unsigned i
= 0; i
< num_cf_nodes
; i
++)
1053 read_cf_node(ctx
, cf_list
);
1057 write_function_impl(write_ctx
*ctx
, const nir_function_impl
*fi
)
1059 write_var_list(ctx
, &fi
->locals
);
1060 write_reg_list(ctx
, &fi
->registers
);
1061 blob_write_uint32(ctx
->blob
, fi
->reg_alloc
);
1063 write_cf_list(ctx
, &fi
->body
);
1064 write_fixup_phis(ctx
);
1067 static nir_function_impl
*
1068 read_function_impl(read_ctx
*ctx
, nir_function
*fxn
)
1070 nir_function_impl
*fi
= nir_function_impl_create_bare(ctx
->nir
);
1073 read_var_list(ctx
, &fi
->locals
);
1074 read_reg_list(ctx
, &fi
->registers
);
1075 fi
->reg_alloc
= blob_read_uint32(ctx
->blob
);
1077 read_cf_list(ctx
, &fi
->body
);
1078 read_fixup_phis(ctx
);
1080 fi
->valid_metadata
= 0;
1086 write_function(write_ctx
*ctx
, const nir_function
*fxn
)
1088 uint32_t flags
= fxn
->is_entrypoint
;
1093 blob_write_uint32(ctx
->blob
, flags
);
1095 blob_write_string(ctx
->blob
, fxn
->name
);
1097 write_add_object(ctx
, fxn
);
1099 blob_write_uint32(ctx
->blob
, fxn
->num_params
);
1100 for (unsigned i
= 0; i
< fxn
->num_params
; i
++) {
1102 ((uint32_t)fxn
->params
[i
].num_components
) |
1103 ((uint32_t)fxn
->params
[i
].bit_size
) << 8;
1104 blob_write_uint32(ctx
->blob
, val
);
1107 /* At first glance, it looks like we should write the function_impl here.
1108 * However, call instructions need to be able to reference at least the
1109 * function and those will get processed as we write the function_impls.
1110 * We stop here and write function_impls as a second pass.
1115 read_function(read_ctx
*ctx
)
1117 uint32_t flags
= blob_read_uint32(ctx
->blob
);
1118 bool has_name
= flags
& 0x2;
1119 char *name
= has_name
? blob_read_string(ctx
->blob
) : NULL
;
1121 nir_function
*fxn
= nir_function_create(ctx
->nir
, name
);
1123 read_add_object(ctx
, fxn
);
1125 fxn
->num_params
= blob_read_uint32(ctx
->blob
);
1126 fxn
->params
= ralloc_array(fxn
, nir_parameter
, fxn
->num_params
);
1127 for (unsigned i
= 0; i
< fxn
->num_params
; i
++) {
1128 uint32_t val
= blob_read_uint32(ctx
->blob
);
1129 fxn
->params
[i
].num_components
= val
& 0xff;
1130 fxn
->params
[i
].bit_size
= (val
>> 8) & 0xff;
1133 fxn
->is_entrypoint
= flags
& 0x1;
1135 fxn
->impl
= NIR_SERIALIZE_FUNC_HAS_IMPL
;
1139 * Serialize NIR into a binary blob.
1141 * \param strip Don't serialize information only useful for debugging,
1142 * such as variable names, making cache hits from similar
1143 * shaders more likely.
1146 nir_serialize(struct blob
*blob
, const nir_shader
*nir
, bool strip
)
1149 ctx
.remap_table
= _mesa_pointer_hash_table_create(NULL
);
1154 util_dynarray_init(&ctx
.phi_fixups
, NULL
);
1156 size_t idx_size_offset
= blob_reserve_uint32(blob
);
1158 struct shader_info info
= nir
->info
;
1159 uint32_t strings
= 0;
1160 if (!strip
&& info
.name
)
1162 if (!strip
&& info
.label
)
1164 blob_write_uint32(blob
, strings
);
1165 if (!strip
&& info
.name
)
1166 blob_write_string(blob
, info
.name
);
1167 if (!strip
&& info
.label
)
1168 blob_write_string(blob
, info
.label
);
1169 info
.name
= info
.label
= NULL
;
1170 blob_write_bytes(blob
, (uint8_t *) &info
, sizeof(info
));
1172 write_var_list(&ctx
, &nir
->uniforms
);
1173 write_var_list(&ctx
, &nir
->inputs
);
1174 write_var_list(&ctx
, &nir
->outputs
);
1175 write_var_list(&ctx
, &nir
->shared
);
1176 write_var_list(&ctx
, &nir
->globals
);
1177 write_var_list(&ctx
, &nir
->system_values
);
1179 blob_write_uint32(blob
, nir
->num_inputs
);
1180 blob_write_uint32(blob
, nir
->num_uniforms
);
1181 blob_write_uint32(blob
, nir
->num_outputs
);
1182 blob_write_uint32(blob
, nir
->num_shared
);
1183 blob_write_uint32(blob
, nir
->scratch_size
);
1185 blob_write_uint32(blob
, exec_list_length(&nir
->functions
));
1186 nir_foreach_function(fxn
, nir
) {
1187 write_function(&ctx
, fxn
);
1190 nir_foreach_function(fxn
, nir
) {
1192 write_function_impl(&ctx
, fxn
->impl
);
1195 blob_write_uint32(blob
, nir
->constant_data_size
);
1196 if (nir
->constant_data_size
> 0)
1197 blob_write_bytes(blob
, nir
->constant_data
, nir
->constant_data_size
);
1199 *(uint32_t *)(blob
->data
+ idx_size_offset
) = ctx
.next_idx
;
1201 _mesa_hash_table_destroy(ctx
.remap_table
, NULL
);
1202 util_dynarray_fini(&ctx
.phi_fixups
);
1206 nir_deserialize(void *mem_ctx
,
1207 const struct nir_shader_compiler_options
*options
,
1208 struct blob_reader
*blob
)
1212 list_inithead(&ctx
.phi_srcs
);
1213 ctx
.idx_table_len
= blob_read_uint32(blob
);
1214 ctx
.idx_table
= calloc(ctx
.idx_table_len
, sizeof(uintptr_t));
1217 uint32_t strings
= blob_read_uint32(blob
);
1218 char *name
= (strings
& 0x1) ? blob_read_string(blob
) : NULL
;
1219 char *label
= (strings
& 0x2) ? blob_read_string(blob
) : NULL
;
1221 struct shader_info info
;
1222 blob_copy_bytes(blob
, (uint8_t *) &info
, sizeof(info
));
1224 ctx
.nir
= nir_shader_create(mem_ctx
, info
.stage
, options
, NULL
);
1226 info
.name
= name
? ralloc_strdup(ctx
.nir
, name
) : NULL
;
1227 info
.label
= label
? ralloc_strdup(ctx
.nir
, label
) : NULL
;
1229 ctx
.nir
->info
= info
;
1231 read_var_list(&ctx
, &ctx
.nir
->uniforms
);
1232 read_var_list(&ctx
, &ctx
.nir
->inputs
);
1233 read_var_list(&ctx
, &ctx
.nir
->outputs
);
1234 read_var_list(&ctx
, &ctx
.nir
->shared
);
1235 read_var_list(&ctx
, &ctx
.nir
->globals
);
1236 read_var_list(&ctx
, &ctx
.nir
->system_values
);
1238 ctx
.nir
->num_inputs
= blob_read_uint32(blob
);
1239 ctx
.nir
->num_uniforms
= blob_read_uint32(blob
);
1240 ctx
.nir
->num_outputs
= blob_read_uint32(blob
);
1241 ctx
.nir
->num_shared
= blob_read_uint32(blob
);
1242 ctx
.nir
->scratch_size
= blob_read_uint32(blob
);
1244 unsigned num_functions
= blob_read_uint32(blob
);
1245 for (unsigned i
= 0; i
< num_functions
; i
++)
1246 read_function(&ctx
);
1248 nir_foreach_function(fxn
, ctx
.nir
) {
1249 if (fxn
->impl
== NIR_SERIALIZE_FUNC_HAS_IMPL
)
1250 fxn
->impl
= read_function_impl(&ctx
, fxn
);
1253 ctx
.nir
->constant_data_size
= blob_read_uint32(blob
);
1254 if (ctx
.nir
->constant_data_size
> 0) {
1255 ctx
.nir
->constant_data
=
1256 ralloc_size(ctx
.nir
, ctx
.nir
->constant_data_size
);
1257 blob_copy_bytes(blob
, ctx
.nir
->constant_data
,
1258 ctx
.nir
->constant_data_size
);
1261 free(ctx
.idx_table
);
1267 nir_shader_serialize_deserialize(nir_shader
*shader
)
1269 const struct nir_shader_compiler_options
*options
= shader
->options
;
1273 nir_serialize(&writer
, shader
, false);
1275 /* Delete all of dest's ralloc children but leave dest alone */
1276 void *dead_ctx
= ralloc_context(NULL
);
1277 ralloc_adopt(dead_ctx
, shader
);
1278 ralloc_free(dead_ctx
);
1280 dead_ctx
= ralloc_context(NULL
);
1282 struct blob_reader reader
;
1283 blob_reader_init(&reader
, writer
.data
, writer
.size
);
1284 nir_shader
*copy
= nir_deserialize(dead_ctx
, options
, &reader
);
1286 blob_finish(&writer
);
1288 nir_shader_replace(shader
, copy
);
1289 ralloc_free(dead_ctx
);