#include "nir_control_flow.h"
#include "util/u_dynarray.h"
+#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
+#define MAX_OBJECT_IDS (1 << 30)
+
typedef struct {
size_t blob_offset;
nir_ssa_def *src;
struct hash_table *remap_table;
/* the next index to assign to a NIR in-memory object */
- uintptr_t next_idx;
+ uint32_t next_idx;
/* Array of write_phi_fixup structs representing phi sources that need to
* be resolved in the second pass.
struct blob_reader *blob;
/* the next index to assign to a NIR in-memory object */
- uintptr_t next_idx;
+ uint32_t next_idx;
/* The length of the index -> object table */
- uintptr_t idx_table_len;
+ uint32_t idx_table_len;
/* map from index to deserialized pointer */
void **idx_table;
static void
write_add_object(write_ctx *ctx, const void *obj)
{
- uintptr_t index = ctx->next_idx++;
- _mesa_hash_table_insert(ctx->remap_table, obj, (void *) index);
+ uint32_t index = ctx->next_idx++;
+ assert(index != MAX_OBJECT_IDS);
+ _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
}
-static uintptr_t
+static uint32_t
write_lookup_object(write_ctx *ctx, const void *obj)
{
struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
assert(entry);
- return (uintptr_t) entry->data;
+ return (uint32_t)(uintptr_t) entry->data;
}
static void
write_object(write_ctx *ctx, const void *obj)
{
- blob_write_intptr(ctx->blob, write_lookup_object(ctx, obj));
+ blob_write_uint32(ctx->blob, write_lookup_object(ctx, obj));
}
static void
}
static void *
-read_lookup_object(read_ctx *ctx, uintptr_t idx)
+read_lookup_object(read_ctx *ctx, uint32_t idx)
{
assert(idx < ctx->idx_table_len);
return ctx->idx_table[idx];
static void *
read_object(read_ctx *ctx)
{
- return read_lookup_object(ctx, blob_read_intptr(ctx->blob));
+ return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
}
static void
return c;
}
+union packed_var {
+ uint32_t u32;
+ struct {
+ unsigned has_name:1;
+ unsigned has_constant_initializer:1;
+ unsigned has_interface_type:1;
+ unsigned num_state_slots:13;
+ unsigned num_members:16;
+ } u;
+};
+
static void
write_variable(write_ctx *ctx, const nir_variable *var)
{
write_add_object(ctx, var);
encode_type_to_blob(ctx->blob, var->type);
- blob_write_uint32(ctx->blob, !!(var->name));
+
+ assert(var->num_state_slots < (1 << 13));
+ assert(var->num_members < (1 << 16));
+
+ STATIC_ASSERT(sizeof(union packed_var) == 4);
+ union packed_var flags;
+ flags.u32 = 0;
+
+ flags.u.has_name = !!(var->name);
+ flags.u.has_constant_initializer = !!(var->constant_initializer);
+ flags.u.has_interface_type = !!(var->interface_type);
+ flags.u.num_state_slots = var->num_state_slots;
+ flags.u.num_members = var->num_members;
+
+ blob_write_uint32(ctx->blob, flags.u32);
+
if (var->name)
blob_write_string(ctx->blob, var->name);
blob_write_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
- blob_write_uint32(ctx->blob, var->num_state_slots);
for (unsigned i = 0; i < var->num_state_slots; i++) {
- for (unsigned j = 0; j < STATE_LENGTH; j++)
- blob_write_uint32(ctx->blob, var->state_slots[i].tokens[j]);
- blob_write_uint32(ctx->blob, var->state_slots[i].swizzle);
+ blob_write_bytes(ctx->blob, &var->state_slots[i],
+ sizeof(var->state_slots[i]));
}
- blob_write_uint32(ctx->blob, !!(var->constant_initializer));
if (var->constant_initializer)
write_constant(ctx, var->constant_initializer);
- blob_write_uint32(ctx->blob, !!(var->interface_type));
if (var->interface_type)
encode_type_to_blob(ctx->blob, var->interface_type);
- blob_write_uint32(ctx->blob, var->num_members);
if (var->num_members > 0) {
blob_write_bytes(ctx->blob, (uint8_t *) var->members,
var->num_members * sizeof(*var->members));
read_add_object(ctx, var);
var->type = decode_type_from_blob(ctx->blob);
- bool has_name = blob_read_uint32(ctx->blob);
- if (has_name) {
+
+ union packed_var flags;
+ flags.u32 = blob_read_uint32(ctx->blob);
+
+ if (flags.u.has_name) {
const char *name = blob_read_string(ctx->blob);
var->name = ralloc_strdup(var, name);
} else {
var->name = NULL;
}
blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
- var->num_state_slots = blob_read_uint32(ctx->blob);
+ var->num_state_slots = flags.u.num_state_slots;
if (var->num_state_slots != 0) {
var->state_slots = ralloc_array(var, nir_state_slot,
var->num_state_slots);
for (unsigned i = 0; i < var->num_state_slots; i++) {
- for (unsigned j = 0; j < STATE_LENGTH; j++)
- var->state_slots[i].tokens[j] = blob_read_uint32(ctx->blob);
- var->state_slots[i].swizzle = blob_read_uint32(ctx->blob);
+ blob_copy_bytes(ctx->blob, &var->state_slots[i],
+ sizeof(var->state_slots[i]));
}
}
- bool has_const_initializer = blob_read_uint32(ctx->blob);
- if (has_const_initializer)
+ if (flags.u.has_constant_initializer)
var->constant_initializer = read_constant(ctx, var);
else
var->constant_initializer = NULL;
- bool has_interface_type = blob_read_uint32(ctx->blob);
- if (has_interface_type)
+ if (flags.u.has_interface_type)
var->interface_type = decode_type_from_blob(ctx->blob);
else
var->interface_type = NULL;
- var->num_members = blob_read_uint32(ctx->blob);
+ var->num_members = flags.u.num_members;
if (var->num_members > 0) {
var->members = ralloc_array(var, struct nir_variable_data,
var->num_members);
blob_write_uint32(ctx->blob, !!(reg->name));
if (reg->name)
blob_write_string(ctx->blob, reg->name);
- blob_write_uint32(ctx->blob, reg->is_global << 1 | reg->is_packed);
}
static nir_register *
} else {
reg->name = NULL;
}
- unsigned flags = blob_read_uint32(ctx->blob);
- reg->is_global = flags & 0x2;
- reg->is_packed = flags & 0x1;
list_inithead(®->uses);
list_inithead(®->defs);
* address space would've been exhausted allocating the remap table!
*/
if (src->is_ssa) {
- uintptr_t idx = write_lookup_object(ctx, src->ssa) << 2;
+ uint32_t idx = write_lookup_object(ctx, src->ssa) << 2;
idx |= 1;
- blob_write_intptr(ctx->blob, idx);
+ blob_write_uint32(ctx->blob, idx);
} else {
- uintptr_t idx = write_lookup_object(ctx, src->reg.reg) << 2;
+ uint32_t idx = write_lookup_object(ctx, src->reg.reg) << 2;
if (src->reg.indirect)
idx |= 2;
- blob_write_intptr(ctx->blob, idx);
+ blob_write_uint32(ctx->blob, idx);
blob_write_uint32(ctx->blob, src->reg.base_offset);
if (src->reg.indirect) {
write_src(ctx, src->reg.indirect);
static void
read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
{
- uintptr_t val = blob_read_intptr(ctx->blob);
- uintptr_t idx = val >> 2;
+ uint32_t val = blob_read_uint32(ctx->blob);
+ uint32_t idx = val >> 2;
src->is_ssa = val & 0x1;
if (src->is_ssa) {
src->ssa = read_lookup_object(ctx, idx);
if (dst->ssa.name)
blob_write_string(ctx->blob, dst->ssa.name);
} else {
- blob_write_intptr(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
+ blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
blob_write_uint32(ctx->blob, dst->reg.base_offset);
if (dst->reg.indirect)
write_src(ctx, dst->reg.indirect);
{
blob_write_uint32(ctx->blob, alu->op);
uint32_t flags = alu->exact;
- flags |= alu->dest.saturate << 1;
- flags |= alu->dest.write_mask << 2;
+ flags |= alu->no_signed_wrap << 1;
+ flags |= alu->no_unsigned_wrap << 2;
+ flags |= alu->dest.saturate << 3;
+ flags |= alu->dest.write_mask << 4;
blob_write_uint32(ctx->blob, flags);
write_dest(ctx, &alu->dest.dest);
uint32_t flags = blob_read_uint32(ctx->blob);
alu->exact = flags & 1;
- alu->dest.saturate = flags & 2;
- alu->dest.write_mask = flags >> 2;
+ alu->no_signed_wrap = flags & 2;
+ alu->no_unsigned_wrap = flags & 4;
+ alu->dest.saturate = flags & 8;
+ alu->dest.write_mask = flags >> 4;
read_dest(ctx, &alu->dest.dest, &alu->instr);
uint32_t val = lc->def.num_components;
val |= lc->def.bit_size << 3;
blob_write_uint32(ctx->blob, val);
- blob_write_bytes(ctx->blob, (uint8_t *) &lc->value, sizeof(lc->value));
+ blob_write_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
write_add_object(ctx, &lc->def);
}
nir_load_const_instr *lc =
nir_load_const_instr_create(ctx->nir, val & 0x7, val >> 3);
- blob_copy_bytes(ctx->blob, (uint8_t *) &lc->value, sizeof(lc->value));
+ blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
read_add_object(ctx, &lc->def);
return lc;
}
blob_write_uint32(ctx->blob, tex->texture_index);
blob_write_uint32(ctx->blob, tex->texture_array_size);
blob_write_uint32(ctx->blob, tex->sampler_index);
+ blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
union packed_tex_data packed = {
tex->texture_index = blob_read_uint32(ctx->blob);
tex->texture_array_size = blob_read_uint32(ctx->blob);
tex->sampler_index = blob_read_uint32(ctx->blob);
+ blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
union packed_tex_data packed;
packed.u32 = blob_read_uint32(ctx->blob);
write_phi(write_ctx *ctx, const nir_phi_instr *phi)
{
/* Phi nodes are special, since they may reference SSA definitions and
- * basic blocks that don't exist yet. We leave two empty uintptr_t's here,
+ * basic blocks that don't exist yet. We leave two empty uint32_t's here,
* and then store enough information so that a later fixup pass can fill
* them in correctly.
*/
nir_foreach_phi_src(src, phi) {
assert(src->src.is_ssa);
- size_t blob_offset = blob_reserve_intptr(ctx->blob);
- MAYBE_UNUSED size_t blob_offset2 = blob_reserve_intptr(ctx->blob);
- assert(blob_offset + sizeof(uintptr_t) == blob_offset2);
+ size_t blob_offset = blob_reserve_uint32(ctx->blob);
+ ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
+ assert(blob_offset + sizeof(uint32_t) == blob_offset2);
write_phi_fixup fixup = {
.blob_offset = blob_offset,
.src = src->src.ssa,
write_fixup_phis(write_ctx *ctx)
{
util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
- uintptr_t *blob_ptr = (uintptr_t *)(ctx->blob->data + fixup->blob_offset);
+ uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
blob_ptr[0] = write_lookup_object(ctx, fixup->src);
blob_ptr[1] = write_lookup_object(ctx, fixup->block);
}
nir_phi_src *src = ralloc(phi, nir_phi_src);
src->src.is_ssa = true;
- src->src.ssa = (nir_ssa_def *) blob_read_intptr(ctx->blob);
- src->pred = (nir_block *) blob_read_intptr(ctx->blob);
+ src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
+ src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
/* Since we're not letting nir_insert_instr handle use/def stuff for us,
* we have to set the parent_instr manually. It doesn't really matter
list_addtail(&src->src.use_link, &src->src.ssa->uses);
}
- assert(list_empty(&ctx->phi_srcs));
+ assert(list_is_empty(&ctx->phi_srcs));
}
static void
static void
write_call(write_ctx *ctx, const nir_call_instr *call)
{
- blob_write_intptr(ctx->blob, write_lookup_object(ctx, call->callee));
+ blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
for (unsigned i = 0; i < call->num_params; i++)
write_src(ctx, &call->params[i]);
static void
write_function(write_ctx *ctx, const nir_function *fxn)
{
- blob_write_uint32(ctx->blob, !!(fxn->name));
+ uint32_t flags = fxn->is_entrypoint;
+ if (fxn->name)
+ flags |= 0x2;
+ if (fxn->impl)
+ flags |= 0x4;
+ blob_write_uint32(ctx->blob, flags);
if (fxn->name)
blob_write_string(ctx->blob, fxn->name);
blob_write_uint32(ctx->blob, val);
}
- blob_write_uint32(ctx->blob, fxn->is_entrypoint);
-
/* At first glance, it looks like we should write the function_impl here.
* However, call instructions need to be able to reference at least the
* function and those will get processed as we write the function_impls.
static void
read_function(read_ctx *ctx)
{
- bool has_name = blob_read_uint32(ctx->blob);
+ uint32_t flags = blob_read_uint32(ctx->blob);
+ bool has_name = flags & 0x2;
char *name = has_name ? blob_read_string(ctx->blob) : NULL;
nir_function *fxn = nir_function_create(ctx->nir, name);
fxn->params[i].bit_size = (val >> 8) & 0xff;
}
- fxn->is_entrypoint = blob_read_uint32(ctx->blob);
+ fxn->is_entrypoint = flags & 0x1;
+ if (flags & 0x4)
+ fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
}
void
-nir_serialize(struct blob *blob, const nir_shader *nir)
+nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
{
+ nir_shader *stripped = NULL;
+
+ if (strip) {
+ /* Drop unnecessary information (like variable names), so the serialized
+ * NIR is smaller, and also to let us detect more isomorphic shaders
+ * when hashing, increasing cache hits.
+ */
+ stripped = nir_shader_clone(NULL, nir);
+ nir_strip(stripped);
+ nir = stripped;
+ }
+
write_ctx ctx;
ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
ctx.next_idx = 0;
ctx.nir = nir;
util_dynarray_init(&ctx.phi_fixups, NULL);
- size_t idx_size_offset = blob_reserve_intptr(blob);
+ size_t idx_size_offset = blob_reserve_uint32(blob);
struct shader_info info = nir->info;
uint32_t strings = 0;
write_var_list(&ctx, &nir->globals);
write_var_list(&ctx, &nir->system_values);
- write_reg_list(&ctx, &nir->registers);
- blob_write_uint32(blob, nir->reg_alloc);
blob_write_uint32(blob, nir->num_inputs);
blob_write_uint32(blob, nir->num_uniforms);
blob_write_uint32(blob, nir->num_outputs);
blob_write_uint32(blob, nir->num_shared);
+ blob_write_uint32(blob, nir->scratch_size);
blob_write_uint32(blob, exec_list_length(&nir->functions));
nir_foreach_function(fxn, nir) {
}
nir_foreach_function(fxn, nir) {
- write_function_impl(&ctx, fxn->impl);
+ if (fxn->impl)
+ write_function_impl(&ctx, fxn->impl);
}
blob_write_uint32(blob, nir->constant_data_size);
if (nir->constant_data_size > 0)
blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
- *(uintptr_t *)(blob->data + idx_size_offset) = ctx.next_idx;
+ *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
_mesa_hash_table_destroy(ctx.remap_table, NULL);
util_dynarray_fini(&ctx.phi_fixups);
+
+ if (strip)
+ ralloc_free(stripped);
}
nir_shader *
read_ctx ctx;
ctx.blob = blob;
list_inithead(&ctx.phi_srcs);
- ctx.idx_table_len = blob_read_intptr(blob);
+ ctx.idx_table_len = blob_read_uint32(blob);
ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
ctx.next_idx = 0;
read_var_list(&ctx, &ctx.nir->globals);
read_var_list(&ctx, &ctx.nir->system_values);
- read_reg_list(&ctx, &ctx.nir->registers);
- ctx.nir->reg_alloc = blob_read_uint32(blob);
ctx.nir->num_inputs = blob_read_uint32(blob);
ctx.nir->num_uniforms = blob_read_uint32(blob);
ctx.nir->num_outputs = blob_read_uint32(blob);
ctx.nir->num_shared = blob_read_uint32(blob);
+ ctx.nir->scratch_size = blob_read_uint32(blob);
unsigned num_functions = blob_read_uint32(blob);
for (unsigned i = 0; i < num_functions; i++)
read_function(&ctx);
- nir_foreach_function(fxn, ctx.nir)
- fxn->impl = read_function_impl(&ctx, fxn);
+ nir_foreach_function(fxn, ctx.nir) {
+ if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
+ fxn->impl = read_function_impl(&ctx, fxn);
+ }
ctx.nir->constant_data_size = blob_read_uint32(blob);
if (ctx.nir->constant_data_size > 0) {
return ctx.nir;
}
-nir_shader *
-nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s)
+void
+nir_shader_serialize_deserialize(nir_shader *shader)
{
- const struct nir_shader_compiler_options *options = s->options;
+ const struct nir_shader_compiler_options *options = shader->options;
struct blob writer;
blob_init(&writer);
- nir_serialize(&writer, s);
- ralloc_free(s);
+ nir_serialize(&writer, shader, false);
+
+ /* Delete all of dest's ralloc children but leave dest alone */
+ void *dead_ctx = ralloc_context(NULL);
+ ralloc_adopt(dead_ctx, shader);
+ ralloc_free(dead_ctx);
+
+ dead_ctx = ralloc_context(NULL);
struct blob_reader reader;
blob_reader_init(&reader, writer.data, writer.size);
- nir_shader *ns = nir_deserialize(mem_ctx, options, &reader);
+ nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
blob_finish(&writer);
- return ns;
+ nir_shader_replace(shader, copy);
+ ralloc_free(dead_ctx);
}