#include "nir_serialize.h"
#include "nir_control_flow.h"
#include "util/u_dynarray.h"
+#include "util/u_math.h"
+
+#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
+#define MAX_OBJECT_IDS (1 << 20)
typedef struct {
size_t blob_offset;
struct hash_table *remap_table;
/* the next index to assign to a NIR in-memory object */
- uintptr_t next_idx;
+ uint32_t next_idx;
/* Array of write_phi_fixup structs representing phi sources that need to
* be resolved in the second pass.
*/
struct util_dynarray phi_fixups;
+
+ /* The last serialized type. */
+ const struct glsl_type *last_type;
+ const struct glsl_type *last_interface_type;
+ struct nir_variable_data last_var_data;
+
+ /* For skipping equal ALU headers (typical after scalarization). */
+ nir_instr_type last_instr_type;
+ uintptr_t last_alu_header_offset;
+
+ /* Don't write optional data such as variable names. */
+ bool strip;
} write_ctx;
typedef struct {
struct blob_reader *blob;
/* the next index to assign to a NIR in-memory object */
- uintptr_t next_idx;
+ uint32_t next_idx;
/* The length of the index -> object table */
- uintptr_t idx_table_len;
+ uint32_t idx_table_len;
/* map from index to deserialized pointer */
void **idx_table;
/* List of phi sources. */
struct list_head phi_srcs;
+ /* The last deserialized type. */
+ const struct glsl_type *last_type;
+ const struct glsl_type *last_interface_type;
+ struct nir_variable_data last_var_data;
} read_ctx;
static void
write_add_object(write_ctx *ctx, const void *obj)
{
- uintptr_t index = ctx->next_idx++;
- _mesa_hash_table_insert(ctx->remap_table, obj, (void *) index);
+ uint32_t index = ctx->next_idx++;
+ assert(index != MAX_OBJECT_IDS);
+ _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
}
-static uintptr_t
+static uint32_t
write_lookup_object(write_ctx *ctx, const void *obj)
{
struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
assert(entry);
- return (uintptr_t) entry->data;
-}
-
-static void
-write_object(write_ctx *ctx, const void *obj)
-{
- blob_write_intptr(ctx->blob, write_lookup_object(ctx, obj));
+ return (uint32_t)(uintptr_t) entry->data;
}
static void
}
static void *
-read_lookup_object(read_ctx *ctx, uintptr_t idx)
+read_lookup_object(read_ctx *ctx, uint32_t idx)
{
assert(idx < ctx->idx_table_len);
return ctx->idx_table[idx];
static void *
read_object(read_ctx *ctx)
{
- return read_lookup_object(ctx, blob_read_intptr(ctx->blob));
+ return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
+}
+
+static uint32_t
+encode_bit_size_3bits(uint8_t bit_size)
+{
+ /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
+ assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
+ if (bit_size)
+ return util_logbase2(bit_size) + 1;
+ return 0;
+}
+
+static uint8_t
+decode_bit_size_3bits(uint8_t bit_size)
+{
+ if (bit_size)
+ return 1 << (bit_size - 1);
+ return 0;
+}
+
+static uint8_t
+encode_num_components_in_3bits(uint8_t num_components)
+{
+ if (num_components <= 4)
+ return num_components;
+ if (num_components == 8)
+ return 5;
+ if (num_components == 16)
+ return 6;
+
+ unreachable("invalid number in num_components");
+ return 0;
+}
+
+static uint8_t
+decode_num_components_in_3bits(uint8_t value)
+{
+ if (value <= 4)
+ return value;
+ if (value == 5)
+ return 8;
+ if (value == 6)
+ return 16;
+
+ unreachable("invalid num_components encoding");
+ return 0;
}
static void
blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
c->num_elements = blob_read_uint32(ctx->blob);
- c->elements = ralloc_array(ctx->nir, nir_constant *, c->num_elements);
+ c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
for (unsigned i = 0; i < c->num_elements; i++)
c->elements[i] = read_constant(ctx, nvar);
return c;
}
+enum var_data_encoding {
+ var_encode_full,
+ var_encode_shader_temp,
+ var_encode_function_temp,
+ var_encode_location_diff,
+};
+
+union packed_var {
+ uint32_t u32;
+ struct {
+ unsigned has_name:1;
+ unsigned has_constant_initializer:1;
+ unsigned has_interface_type:1;
+ unsigned num_state_slots:7;
+ unsigned data_encoding:2;
+ unsigned type_same_as_last:1;
+ unsigned interface_type_same_as_last:1;
+ unsigned _pad:2;
+ unsigned num_members:16;
+ } u;
+};
+
+union packed_var_data_diff {
+ uint32_t u32;
+ struct {
+ int location:13;
+ int location_frac:3;
+ int driver_location:16;
+ } u;
+};
+
static void
write_variable(write_ctx *ctx, const nir_variable *var)
{
write_add_object(ctx, var);
- encode_type_to_blob(ctx->blob, var->type);
- blob_write_uint32(ctx->blob, !!(var->name));
- if (var->name)
+
+ assert(var->num_state_slots < (1 << 7));
+ assert(var->num_members < (1 << 16));
+
+ STATIC_ASSERT(sizeof(union packed_var) == 4);
+ union packed_var flags;
+ flags.u32 = 0;
+
+ flags.u.has_name = !ctx->strip && var->name;
+ flags.u.has_constant_initializer = !!(var->constant_initializer);
+ flags.u.has_interface_type = !!(var->interface_type);
+ flags.u.type_same_as_last = var->type == ctx->last_type;
+ flags.u.interface_type_same_as_last =
+ var->interface_type && var->interface_type == ctx->last_interface_type;
+ flags.u.num_state_slots = var->num_state_slots;
+ flags.u.num_members = var->num_members;
+
+ struct nir_variable_data data = var->data;
+
+ /* When stripping, we expect that the location is no longer needed,
+ * which is typically after shaders are linked.
+ */
+ if (ctx->strip &&
+ data.mode != nir_var_shader_in &&
+ data.mode != nir_var_shader_out)
+ data.location = 0;
+
+ /* Temporary variables don't serialize var->data. */
+ if (data.mode == nir_var_shader_temp)
+ flags.u.data_encoding = var_encode_shader_temp;
+ else if (data.mode == nir_var_function_temp)
+ flags.u.data_encoding = var_encode_function_temp;
+ else {
+ struct nir_variable_data tmp = data;
+
+ tmp.location = ctx->last_var_data.location;
+ tmp.location_frac = ctx->last_var_data.location_frac;
+ tmp.driver_location = ctx->last_var_data.driver_location;
+
+ /* See if we can encode only the difference in locations from the last
+ * variable.
+ */
+ if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
+ abs((int)data.location -
+ (int)ctx->last_var_data.location) < (1 << 12) &&
+ abs((int)data.driver_location -
+ (int)ctx->last_var_data.driver_location) < (1 << 15))
+ flags.u.data_encoding = var_encode_location_diff;
+ else
+ flags.u.data_encoding = var_encode_full;
+ }
+
+ blob_write_uint32(ctx->blob, flags.u32);
+
+ if (!flags.u.type_same_as_last) {
+ encode_type_to_blob(ctx->blob, var->type);
+ ctx->last_type = var->type;
+ }
+
+ if (var->interface_type && !flags.u.interface_type_same_as_last) {
+ encode_type_to_blob(ctx->blob, var->interface_type);
+ ctx->last_interface_type = var->interface_type;
+ }
+
+ if (flags.u.has_name)
blob_write_string(ctx->blob, var->name);
- blob_write_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
- blob_write_uint32(ctx->blob, var->num_state_slots);
- blob_write_bytes(ctx->blob, (uint8_t *) var->state_slots,
- var->num_state_slots * sizeof(nir_state_slot));
- blob_write_uint32(ctx->blob, !!(var->constant_initializer));
+
+ if (flags.u.data_encoding == var_encode_full ||
+ flags.u.data_encoding == var_encode_location_diff) {
+ if (flags.u.data_encoding == var_encode_full) {
+ blob_write_bytes(ctx->blob, &data, sizeof(data));
+ } else {
+ /* Serialize only the difference in locations from the last variable.
+ */
+ union packed_var_data_diff diff;
+
+ diff.u.location = data.location - ctx->last_var_data.location;
+ diff.u.location_frac = data.location_frac -
+ ctx->last_var_data.location_frac;
+ diff.u.driver_location = data.driver_location -
+ ctx->last_var_data.driver_location;
+
+ blob_write_uint32(ctx->blob, diff.u32);
+ }
+
+ ctx->last_var_data = data;
+ }
+
+ for (unsigned i = 0; i < var->num_state_slots; i++) {
+ blob_write_bytes(ctx->blob, &var->state_slots[i],
+ sizeof(var->state_slots[i]));
+ }
if (var->constant_initializer)
write_constant(ctx, var->constant_initializer);
- blob_write_uint32(ctx->blob, !!(var->interface_type));
- if (var->interface_type)
- encode_type_to_blob(ctx->blob, var->interface_type);
+ if (var->num_members > 0) {
+ blob_write_bytes(ctx->blob, (uint8_t *) var->members,
+ var->num_members * sizeof(*var->members));
+ }
}
static nir_variable *
nir_variable *var = rzalloc(ctx->nir, nir_variable);
read_add_object(ctx, var);
- var->type = decode_type_from_blob(ctx->blob);
- bool has_name = blob_read_uint32(ctx->blob);
- if (has_name) {
+ union packed_var flags;
+ flags.u32 = blob_read_uint32(ctx->blob);
+
+ if (flags.u.type_same_as_last) {
+ var->type = ctx->last_type;
+ } else {
+ var->type = decode_type_from_blob(ctx->blob);
+ ctx->last_type = var->type;
+ }
+
+ if (flags.u.has_interface_type) {
+ if (flags.u.interface_type_same_as_last) {
+ var->interface_type = ctx->last_interface_type;
+ } else {
+ var->interface_type = decode_type_from_blob(ctx->blob);
+ ctx->last_interface_type = var->interface_type;
+ }
+ }
+
+ if (flags.u.has_name) {
const char *name = blob_read_string(ctx->blob);
var->name = ralloc_strdup(var, name);
} else {
var->name = NULL;
}
- blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
- var->num_state_slots = blob_read_uint32(ctx->blob);
- var->state_slots = ralloc_array(var, nir_state_slot, var->num_state_slots);
- blob_copy_bytes(ctx->blob, (uint8_t *) var->state_slots,
- var->num_state_slots * sizeof(nir_state_slot));
- bool has_const_initializer = blob_read_uint32(ctx->blob);
- if (has_const_initializer)
+
+ if (flags.u.data_encoding == var_encode_shader_temp)
+ var->data.mode = nir_var_shader_temp;
+ else if (flags.u.data_encoding == var_encode_function_temp)
+ var->data.mode = nir_var_function_temp;
+ else if (flags.u.data_encoding == var_encode_full) {
+ blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
+ ctx->last_var_data = var->data;
+ } else { /* var_encode_location_diff */
+ union packed_var_data_diff diff;
+ diff.u32 = blob_read_uint32(ctx->blob);
+
+ var->data = ctx->last_var_data;
+ var->data.location += diff.u.location;
+ var->data.location_frac += diff.u.location_frac;
+ var->data.driver_location += diff.u.driver_location;
+
+ ctx->last_var_data = var->data;
+ }
+
+ var->num_state_slots = flags.u.num_state_slots;
+ if (var->num_state_slots != 0) {
+ var->state_slots = ralloc_array(var, nir_state_slot,
+ var->num_state_slots);
+ for (unsigned i = 0; i < var->num_state_slots; i++) {
+ blob_copy_bytes(ctx->blob, &var->state_slots[i],
+ sizeof(var->state_slots[i]));
+ }
+ }
+ if (flags.u.has_constant_initializer)
var->constant_initializer = read_constant(ctx, var);
else
var->constant_initializer = NULL;
- bool has_interface_type = blob_read_uint32(ctx->blob);
- if (has_interface_type)
- var->interface_type = decode_type_from_blob(ctx->blob);
- else
- var->interface_type = NULL;
+ var->num_members = flags.u.num_members;
+ if (var->num_members > 0) {
+ var->members = ralloc_array(var, struct nir_variable_data,
+ var->num_members);
+ blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
+ var->num_members * sizeof(*var->members));
+ }
return var;
}
blob_write_uint32(ctx->blob, reg->bit_size);
blob_write_uint32(ctx->blob, reg->num_array_elems);
blob_write_uint32(ctx->blob, reg->index);
- blob_write_uint32(ctx->blob, !!(reg->name));
- if (reg->name)
+ blob_write_uint32(ctx->blob, !ctx->strip && reg->name);
+ if (!ctx->strip && reg->name)
blob_write_string(ctx->blob, reg->name);
- blob_write_uint32(ctx->blob, reg->is_global << 1 | reg->is_packed);
}
static nir_register *
} else {
reg->name = NULL;
}
- unsigned flags = blob_read_uint32(ctx->blob);
- reg->is_global = flags & 0x2;
- reg->is_packed = flags & 0x1;
list_inithead(®->uses);
list_inithead(®->defs);
}
}
+union packed_src {
+ uint32_t u32;
+ struct {
+ unsigned is_ssa:1; /* <-- Header */
+ unsigned is_indirect:1;
+ unsigned object_idx:20;
+ unsigned _footer:10; /* <-- Footer */
+ } any;
+ struct {
+ unsigned _header:22; /* <-- Header */
+ unsigned negate:1; /* <-- Footer */
+ unsigned abs:1;
+ unsigned swizzle_x:2;
+ unsigned swizzle_y:2;
+ unsigned swizzle_z:2;
+ unsigned swizzle_w:2;
+ } alu;
+ struct {
+ unsigned _header:22; /* <-- Header */
+ unsigned src_type:5; /* <-- Footer */
+ unsigned _pad:5;
+ } tex;
+};
+
static void
-write_src(write_ctx *ctx, const nir_src *src)
+write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
{
/* Since sources are very frequent, we try to save some space when storing
* them. In particular, we store whether the source is a register and
* assume that the high two bits of the index are zero, since otherwise our
* address space would've been exhausted allocating the remap table!
*/
+ header.any.is_ssa = src->is_ssa;
if (src->is_ssa) {
- uintptr_t idx = write_lookup_object(ctx, src->ssa) << 2;
- idx |= 1;
- blob_write_intptr(ctx->blob, idx);
+ header.any.object_idx = write_lookup_object(ctx, src->ssa);
+ blob_write_uint32(ctx->blob, header.u32);
} else {
- uintptr_t idx = write_lookup_object(ctx, src->reg.reg) << 2;
- if (src->reg.indirect)
- idx |= 2;
- blob_write_intptr(ctx->blob, idx);
+ header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
+ header.any.is_indirect = !!src->reg.indirect;
+ blob_write_uint32(ctx->blob, header.u32);
blob_write_uint32(ctx->blob, src->reg.base_offset);
if (src->reg.indirect) {
- write_src(ctx, src->reg.indirect);
+ union packed_src header = {0};
+ write_src_full(ctx, src->reg.indirect, header);
}
}
}
static void
+write_src(write_ctx *ctx, const nir_src *src)
+{
+ union packed_src header = {0};
+ write_src_full(ctx, src, header);
+}
+
+static union packed_src
read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
{
- uintptr_t val = blob_read_intptr(ctx->blob);
- uintptr_t idx = val >> 2;
- src->is_ssa = val & 0x1;
+ STATIC_ASSERT(sizeof(union packed_src) == 4);
+ union packed_src header;
+ header.u32 = blob_read_uint32(ctx->blob);
+
+ src->is_ssa = header.any.is_ssa;
if (src->is_ssa) {
- src->ssa = read_lookup_object(ctx, idx);
+ src->ssa = read_lookup_object(ctx, header.any.object_idx);
} else {
- bool is_indirect = val & 0x2;
- src->reg.reg = read_lookup_object(ctx, idx);
+ src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
src->reg.base_offset = blob_read_uint32(ctx->blob);
- if (is_indirect) {
+ if (header.any.is_indirect) {
src->reg.indirect = ralloc(mem_ctx, nir_src);
read_src(ctx, src->reg.indirect, mem_ctx);
} else {
src->reg.indirect = NULL;
}
}
+ return header;
}
+union packed_dest {
+ uint8_t u8;
+ struct {
+ uint8_t is_ssa:1;
+ uint8_t has_name:1;
+ uint8_t num_components:3;
+ uint8_t bit_size:3;
+ } ssa;
+ struct {
+ uint8_t is_ssa:1;
+ uint8_t is_indirect:1;
+ uint8_t _pad:6;
+ } reg;
+};
+
+enum intrinsic_const_indices_encoding {
+ /* Use the 6 bits of packed_const_indices to store 1-6 indices.
+ * 1 6-bit index, or 2 3-bit indices, or 3 2-bit indices, or
+ * 4-6 1-bit indices.
+ *
+ * The common case for load_ubo is 0, 0, 0, which is trivially represented.
+ * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
+ */
+ const_indices_6bit_all_combined,
+
+ const_indices_8bit, /* 8 bits per element */
+ const_indices_16bit, /* 16 bits per element */
+ const_indices_32bit, /* 32 bits per element */
+};
+
+enum load_const_packing {
+ /* Constants are not packed and are stored in following dwords. */
+ load_const_full,
+
+ /* packed_value contains high 19 bits, low bits are 0,
+ * good for floating-point decimals
+ */
+ load_const_scalar_hi_19bits,
+
+ /* packed_value contains low 19 bits, high bits are sign-extended */
+ load_const_scalar_lo_19bits_sext,
+};
+
+union packed_instr {
+ uint32_t u32;
+ struct {
+ unsigned instr_type:4; /* always present */
+ unsigned _pad:20;
+ unsigned dest:8; /* always last */
+ } any;
+ struct {
+ unsigned instr_type:4;
+ unsigned exact:1;
+ unsigned no_signed_wrap:1;
+ unsigned no_unsigned_wrap:1;
+ unsigned saturate:1;
+ unsigned writemask:4;
+ unsigned op:9;
+ unsigned packed_src_ssa_16bit:1;
+ /* Scalarized ALUs always have the same header. */
+ unsigned num_followup_alu_sharing_header:2;
+ unsigned dest:8;
+ } alu;
+ struct {
+ unsigned instr_type:4;
+ unsigned deref_type:3;
+ unsigned cast_type_same_as_last:1;
+ unsigned mode:10; /* deref_var redefines this */
+ unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
+ unsigned _pad:5; /* deref_var redefines this */
+ unsigned dest:8;
+ } deref;
+ struct {
+ unsigned instr_type:4;
+ unsigned deref_type:3;
+ unsigned _pad:1;
+ unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
+ unsigned dest:8;
+ } deref_var;
+ struct {
+ unsigned instr_type:4;
+ unsigned intrinsic:9;
+ unsigned num_components:3;
+ unsigned const_indices_encoding:2;
+ unsigned packed_const_indices:6;
+ unsigned dest:8;
+ } intrinsic;
+ struct {
+ unsigned instr_type:4;
+ unsigned last_component:4;
+ unsigned bit_size:3;
+ unsigned packing:2; /* enum load_const_packing */
+ unsigned packed_value:19; /* meaning determined by packing */
+ } load_const;
+ struct {
+ unsigned instr_type:4;
+ unsigned last_component:4;
+ unsigned bit_size:3;
+ unsigned _pad:21;
+ } undef;
+ struct {
+ unsigned instr_type:4;
+ unsigned num_srcs:4;
+ unsigned op:4;
+ unsigned texture_array_size:12;
+ unsigned dest:8;
+ } tex;
+ struct {
+ unsigned instr_type:4;
+ unsigned num_srcs:20;
+ unsigned dest:8;
+ } phi;
+ struct {
+ unsigned instr_type:4;
+ unsigned type:2;
+ unsigned _pad:26;
+ } jump;
+};
+
+/* Write "lo24" as low 24 bits in the first uint32. */
static void
-write_dest(write_ctx *ctx, const nir_dest *dst)
+write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
+ nir_instr_type instr_type)
{
- uint32_t val = dst->is_ssa;
+ STATIC_ASSERT(sizeof(union packed_dest) == 1);
+ union packed_dest dest;
+ dest.u8 = 0;
+
+ dest.ssa.is_ssa = dst->is_ssa;
if (dst->is_ssa) {
- val |= !!(dst->ssa.name) << 1;
- val |= dst->ssa.num_components << 2;
- val |= dst->ssa.bit_size << 5;
+ dest.ssa.has_name = !ctx->strip && dst->ssa.name;
+ dest.ssa.num_components =
+ encode_num_components_in_3bits(dst->ssa.num_components);
+ dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
+ } else {
+ dest.reg.is_indirect = !!(dst->reg.indirect);
+ }
+ header.any.dest = dest.u8;
+
+ /* Check if the current ALU instruction has the same header as the previous
+ * instruction that is also ALU. If it is, we don't have to write
+ * the current header. This is a typical occurence after scalarization.
+ */
+ if (instr_type == nir_instr_type_alu) {
+ bool equal_header = false;
+
+ if (ctx->last_instr_type == nir_instr_type_alu) {
+ assert(ctx->last_alu_header_offset);
+ union packed_instr *last_header =
+ (union packed_instr *)(ctx->blob->data +
+ ctx->last_alu_header_offset);
+
+ /* Clear the field that counts ALUs with equal headers. */
+ union packed_instr clean_header;
+ clean_header.u32 = last_header->u32;
+ clean_header.alu.num_followup_alu_sharing_header = 0;
+
+ /* There can be at most 4 consecutive ALU instructions
+ * sharing the same header.
+ */
+ if (last_header->alu.num_followup_alu_sharing_header < 3 &&
+ header.u32 == clean_header.u32) {
+ last_header->alu.num_followup_alu_sharing_header++;
+ equal_header = true;
+ }
+ }
+
+ if (!equal_header) {
+ ctx->last_alu_header_offset = ctx->blob->size;
+ blob_write_uint32(ctx->blob, header.u32);
+ }
} else {
- val |= !!(dst->reg.indirect) << 1;
+ blob_write_uint32(ctx->blob, header.u32);
}
- blob_write_uint32(ctx->blob, val);
+
if (dst->is_ssa) {
write_add_object(ctx, &dst->ssa);
- if (dst->ssa.name)
+ if (dest.ssa.has_name)
blob_write_string(ctx->blob, dst->ssa.name);
} else {
- blob_write_intptr(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
+ blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
blob_write_uint32(ctx->blob, dst->reg.base_offset);
if (dst->reg.indirect)
write_src(ctx, dst->reg.indirect);
}
static void
-read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr)
-{
- uint32_t val = blob_read_uint32(ctx->blob);
- bool is_ssa = val & 0x1;
- if (is_ssa) {
- bool has_name = val & 0x2;
- unsigned num_components = (val >> 2) & 0x7;
- unsigned bit_size = val >> 5;
- char *name = has_name ? blob_read_string(ctx->blob) : NULL;
+read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
+ union packed_instr header)
+{
+ union packed_dest dest;
+ dest.u8 = header.any.dest;
+
+ if (dest.ssa.is_ssa) {
+ unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
+ unsigned num_components =
+ decode_num_components_in_3bits(dest.ssa.num_components);
+ char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
read_add_object(ctx, &dst->ssa);
} else {
- bool is_indirect = val & 0x2;
dst->reg.reg = read_object(ctx);
dst->reg.base_offset = blob_read_uint32(ctx->blob);
- if (is_indirect) {
+ if (dest.reg.is_indirect) {
dst->reg.indirect = ralloc(instr, nir_src);
read_src(ctx, dst->reg.indirect, instr);
}
}
}
-static void
-write_deref_chain(write_ctx *ctx, const nir_deref_var *deref_var)
-{
- write_object(ctx, deref_var->var);
-
- uint32_t len = 0;
- for (const nir_deref *d = deref_var->deref.child; d; d = d->child)
- len++;
- blob_write_uint32(ctx->blob, len);
-
- for (const nir_deref *d = deref_var->deref.child; d; d = d->child) {
- blob_write_uint32(ctx->blob, d->deref_type);
- switch (d->deref_type) {
- case nir_deref_type_array: {
- const nir_deref_array *deref_array = nir_deref_as_array(d);
- blob_write_uint32(ctx->blob, deref_array->deref_array_type);
- blob_write_uint32(ctx->blob, deref_array->base_offset);
- if (deref_array->deref_array_type == nir_deref_array_type_indirect)
- write_src(ctx, &deref_array->indirect);
- break;
- }
- case nir_deref_type_struct: {
- const nir_deref_struct *deref_struct = nir_deref_as_struct(d);
- blob_write_uint32(ctx->blob, deref_struct->index);
- break;
- }
- case nir_deref_type_var:
- unreachable("Invalid deref type");
- }
-
- encode_type_to_blob(ctx->blob, d->type);
- }
+static bool
+are_object_ids_16bit(write_ctx *ctx)
+{
+ /* Check the highest object ID, because they are monotonic. */
+ return ctx->next_idx < (1 << 16);
}
-static nir_deref_var *
-read_deref_chain(read_ctx *ctx, void *mem_ctx)
+static bool
+is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
{
- nir_variable *var = read_object(ctx);
- nir_deref_var *deref_var = nir_deref_var_create(mem_ctx, var);
+ unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
- uint32_t len = blob_read_uint32(ctx->blob);
-
- nir_deref *tail = &deref_var->deref;
- for (uint32_t i = 0; i < len; i++) {
- nir_deref_type deref_type = blob_read_uint32(ctx->blob);
- nir_deref *deref = NULL;
- switch (deref_type) {
- case nir_deref_type_array: {
- nir_deref_array *deref_array = nir_deref_array_create(tail);
- deref_array->deref_array_type = blob_read_uint32(ctx->blob);
- deref_array->base_offset = blob_read_uint32(ctx->blob);
- if (deref_array->deref_array_type == nir_deref_array_type_indirect)
- read_src(ctx, &deref_array->indirect, mem_ctx);
- deref = &deref_array->deref;
- break;
- }
- case nir_deref_type_struct: {
- uint32_t index = blob_read_uint32(ctx->blob);
- nir_deref_struct *deref_struct = nir_deref_struct_create(tail, index);
- deref = &deref_struct->deref;
- break;
- }
- case nir_deref_type_var:
- unreachable("Invalid deref type");
- }
+ for (unsigned i = 0; i < num_srcs; i++) {
+ if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
+ return false;
- deref->type = decode_type_from_blob(ctx->blob);
+ unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
- tail->child = deref;
- tail = deref;
+ for (unsigned chan = 0; chan < src_components; chan++) {
+ if (alu->src[i].swizzle[chan] != chan)
+ return false;
+ }
}
- return deref_var;
+ return are_object_ids_16bit(ctx);
}
static void
write_alu(write_ctx *ctx, const nir_alu_instr *alu)
{
- blob_write_uint32(ctx->blob, alu->op);
- uint32_t flags = alu->exact;
- flags |= alu->dest.saturate << 1;
- flags |= alu->dest.write_mask << 2;
- blob_write_uint32(ctx->blob, flags);
-
- write_dest(ctx, &alu->dest.dest);
-
- for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
- write_src(ctx, &alu->src[i].src);
- flags = alu->src[i].negate;
- flags |= alu->src[i].abs << 1;
- for (unsigned j = 0; j < 4; j++)
- flags |= alu->src[i].swizzle[j] << (2 + 2 * j);
- blob_write_uint32(ctx->blob, flags);
+ unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
+ /* 9 bits for nir_op */
+ STATIC_ASSERT(nir_num_opcodes <= 512);
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.alu.instr_type = alu->instr.type;
+ header.alu.exact = alu->exact;
+ header.alu.no_signed_wrap = alu->no_signed_wrap;
+ header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
+ header.alu.saturate = alu->dest.saturate;
+ header.alu.writemask = alu->dest.write_mask;
+ header.alu.op = alu->op;
+ header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
+
+ write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
+
+ if (header.alu.packed_src_ssa_16bit) {
+ for (unsigned i = 0; i < num_srcs; i++) {
+ assert(alu->src[i].src.is_ssa);
+ unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
+ assert(idx < (1 << 16));
+ blob_write_uint16(ctx->blob, idx);
+ }
+ } else {
+ for (unsigned i = 0; i < num_srcs; i++) {
+ union packed_src src;
+ src.u32 = 0;
+
+ src.alu.negate = alu->src[i].negate;
+ src.alu.abs = alu->src[i].abs;
+ src.alu.swizzle_x = alu->src[i].swizzle[0];
+ src.alu.swizzle_y = alu->src[i].swizzle[1];
+ src.alu.swizzle_z = alu->src[i].swizzle[2];
+ src.alu.swizzle_w = alu->src[i].swizzle[3];
+
+ write_src_full(ctx, &alu->src[i].src, src);
+ }
}
}
static nir_alu_instr *
-read_alu(read_ctx *ctx)
+read_alu(read_ctx *ctx, union packed_instr header)
{
- nir_op op = blob_read_uint32(ctx->blob);
- nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, op);
+ unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
+ nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
- uint32_t flags = blob_read_uint32(ctx->blob);
- alu->exact = flags & 1;
- alu->dest.saturate = flags & 2;
- alu->dest.write_mask = flags >> 2;
-
- read_dest(ctx, &alu->dest.dest, &alu->instr);
-
- for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
- read_src(ctx, &alu->src[i].src, &alu->instr);
- flags = blob_read_uint32(ctx->blob);
- alu->src[i].negate = flags & 1;
- alu->src[i].abs = flags & 2;
- for (unsigned j = 0; j < 4; j++)
- alu->src[i].swizzle[j] = (flags >> (2 * j + 2)) & 3;
+ alu->exact = header.alu.exact;
+ alu->no_signed_wrap = header.alu.no_signed_wrap;
+ alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
+ alu->dest.saturate = header.alu.saturate;
+ alu->dest.write_mask = header.alu.writemask;
+
+ read_dest(ctx, &alu->dest.dest, &alu->instr, header);
+
+ if (header.alu.packed_src_ssa_16bit) {
+ for (unsigned i = 0; i < num_srcs; i++) {
+ nir_alu_src *src = &alu->src[i];
+ src->src.is_ssa = true;
+ src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
+
+ memset(&src->swizzle, 0, sizeof(src->swizzle));
+
+ unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
+
+ for (unsigned chan = 0; chan < src_components; chan++)
+ src->swizzle[chan] = chan;
+ }
+ } else {
+ for (unsigned i = 0; i < num_srcs; i++) {
+ union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
+
+ alu->src[i].negate = src.alu.negate;
+ alu->src[i].abs = src.alu.abs;
+ alu->src[i].swizzle[0] = src.alu.swizzle_x;
+ alu->src[i].swizzle[1] = src.alu.swizzle_y;
+ alu->src[i].swizzle[2] = src.alu.swizzle_z;
+ alu->src[i].swizzle[3] = src.alu.swizzle_w;
+ }
}
return alu;
static void
write_deref(write_ctx *ctx, const nir_deref_instr *deref)
{
- blob_write_uint32(ctx->blob, deref->deref_type);
+ assert(deref->deref_type < 8);
+ assert(deref->mode < (1 << 10));
- blob_write_uint32(ctx->blob, deref->mode);
- encode_type_to_blob(ctx->blob, deref->type);
+ union packed_instr header;
+ header.u32 = 0;
- write_dest(ctx, &deref->dest);
+ header.deref.instr_type = deref->instr.type;
+ header.deref.deref_type = deref->deref_type;
+ if (deref->deref_type == nir_deref_type_cast) {
+ header.deref.mode = deref->mode;
+ header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
+ }
+
+ unsigned var_idx = 0;
if (deref->deref_type == nir_deref_type_var) {
- write_object(ctx, deref->var);
- return;
+ var_idx = write_lookup_object(ctx, deref->var);
+ if (var_idx && var_idx < (1 << 16))
+ header.deref_var.object_idx = var_idx;
}
- write_src(ctx, &deref->parent);
+ if (deref->deref_type == nir_deref_type_array ||
+ deref->deref_type == nir_deref_type_ptr_as_array) {
+ header.deref.packed_src_ssa_16bit =
+ deref->parent.is_ssa && deref->arr.index.is_ssa &&
+ are_object_ids_16bit(ctx);
+ }
+
+ write_dest(ctx, &deref->dest, header, deref->instr.type);
switch (deref->deref_type) {
+ case nir_deref_type_var:
+ if (!header.deref_var.object_idx)
+ blob_write_uint32(ctx->blob, var_idx);
+ break;
+
case nir_deref_type_struct:
+ write_src(ctx, &deref->parent);
blob_write_uint32(ctx->blob, deref->strct.index);
break;
case nir_deref_type_array:
- write_src(ctx, &deref->arr.index);
+ case nir_deref_type_ptr_as_array:
+ if (header.deref.packed_src_ssa_16bit) {
+ blob_write_uint16(ctx->blob,
+ write_lookup_object(ctx, deref->parent.ssa));
+ blob_write_uint16(ctx->blob,
+ write_lookup_object(ctx, deref->arr.index.ssa));
+ } else {
+ write_src(ctx, &deref->parent);
+ write_src(ctx, &deref->arr.index);
+ }
break;
- case nir_deref_type_array_wildcard:
case nir_deref_type_cast:
- /* Nothing to do */
+ write_src(ctx, &deref->parent);
+ blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
+ if (!header.deref.cast_type_same_as_last) {
+ encode_type_to_blob(ctx->blob, deref->type);
+ ctx->last_type = deref->type;
+ }
+ break;
+
+ case nir_deref_type_array_wildcard:
+ write_src(ctx, &deref->parent);
break;
default:
}
static nir_deref_instr *
-read_deref(read_ctx *ctx)
+read_deref(read_ctx *ctx, union packed_instr header)
{
- nir_deref_type deref_type = blob_read_uint32(ctx->blob);
+ nir_deref_type deref_type = header.deref.deref_type;
nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
- deref->mode = blob_read_uint32(ctx->blob);
- deref->type = decode_type_from_blob(ctx->blob);
+ read_dest(ctx, &deref->dest, &deref->instr, header);
- read_dest(ctx, &deref->dest, &deref->instr);
+ nir_deref_instr *parent;
- if (deref_type == nir_deref_type_var) {
- deref->var = read_object(ctx);
- return deref;
- }
+ switch (deref->deref_type) {
+ case nir_deref_type_var:
+ if (header.deref_var.object_idx)
+ deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
+ else
+ deref->var = read_object(ctx);
- read_src(ctx, &deref->parent, &deref->instr);
+ deref->type = deref->var->type;
+ break;
- switch (deref->deref_type) {
case nir_deref_type_struct:
+ read_src(ctx, &deref->parent, &deref->instr);
+ parent = nir_src_as_deref(deref->parent);
deref->strct.index = blob_read_uint32(ctx->blob);
+ deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
break;
case nir_deref_type_array:
- read_src(ctx, &deref->arr.index, &deref->instr);
+ case nir_deref_type_ptr_as_array:
+ if (header.deref.packed_src_ssa_16bit) {
+ deref->parent.is_ssa = true;
+ deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
+ deref->arr.index.is_ssa = true;
+ deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
+ } else {
+ read_src(ctx, &deref->parent, &deref->instr);
+ read_src(ctx, &deref->arr.index, &deref->instr);
+ }
+
+ parent = nir_src_as_deref(deref->parent);
+ if (deref->deref_type == nir_deref_type_array)
+ deref->type = glsl_get_array_element(parent->type);
+ else
+ deref->type = parent->type;
break;
- case nir_deref_type_array_wildcard:
case nir_deref_type_cast:
- /* Nothing to do */
+ read_src(ctx, &deref->parent, &deref->instr);
+ deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
+ if (header.deref.cast_type_same_as_last) {
+ deref->type = ctx->last_type;
+ } else {
+ deref->type = decode_type_from_blob(ctx->blob);
+ ctx->last_type = deref->type;
+ }
+ break;
+
+ case nir_deref_type_array_wildcard:
+ read_src(ctx, &deref->parent, &deref->instr);
+ parent = nir_src_as_deref(deref->parent);
+ deref->type = glsl_get_array_element(parent->type);
break;
default:
unreachable("Invalid deref type");
}
+ if (deref_type == nir_deref_type_var) {
+ deref->mode = deref->var->data.mode;
+ } else if (deref->deref_type == nir_deref_type_cast) {
+ deref->mode = header.deref.mode;
+ } else {
+ assert(deref->parent.is_ssa);
+ deref->mode = nir_instr_as_deref(deref->parent.ssa->parent_instr)->mode;
+ }
+
return deref;
}
static void
write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
{
- blob_write_uint32(ctx->blob, intrin->intrinsic);
-
- unsigned num_variables = nir_intrinsic_infos[intrin->intrinsic].num_variables;
+ /* 9 bits for nir_intrinsic_op */
+ STATIC_ASSERT(nir_num_intrinsics <= 512);
unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
+ assert(intrin->intrinsic < 512);
+
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.intrinsic.instr_type = intrin->instr.type;
+ header.intrinsic.intrinsic = intrin->intrinsic;
+ header.intrinsic.num_components =
+ encode_num_components_in_3bits(intrin->num_components);
+
+ /* Analyze constant indices to decide how to encode them. */
+ if (num_indices) {
+ unsigned max_bits = 0;
+ for (unsigned i = 0; i < num_indices; i++) {
+ unsigned max = util_last_bit(intrin->const_index[i]);
+ max_bits = MAX2(max_bits, max);
+ }
- blob_write_uint32(ctx->blob, intrin->num_components);
+ if (max_bits * num_indices <= 6) {
+ header.intrinsic.const_indices_encoding = const_indices_6bit_all_combined;
+
+ /* Pack all const indices into 6 bits. */
+ unsigned bit_size = 6 / num_indices;
+ for (unsigned i = 0; i < num_indices; i++) {
+ header.intrinsic.packed_const_indices |=
+ intrin->const_index[i] << (i * bit_size);
+ }
+ } else if (max_bits <= 8)
+ header.intrinsic.const_indices_encoding = const_indices_8bit;
+ else if (max_bits <= 16)
+ header.intrinsic.const_indices_encoding = const_indices_16bit;
+ else
+ header.intrinsic.const_indices_encoding = const_indices_32bit;
+ }
if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
- write_dest(ctx, &intrin->dest);
-
- for (unsigned i = 0; i < num_variables; i++)
- write_deref_chain(ctx, intrin->variables[i]);
+ write_dest(ctx, &intrin->dest, header, intrin->instr.type);
+ else
+ blob_write_uint32(ctx->blob, header.u32);
for (unsigned i = 0; i < num_srcs; i++)
write_src(ctx, &intrin->src[i]);
- for (unsigned i = 0; i < num_indices; i++)
- blob_write_uint32(ctx->blob, intrin->const_index[i]);
+ if (num_indices) {
+ switch (header.intrinsic.const_indices_encoding) {
+ case const_indices_8bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ blob_write_uint8(ctx->blob, intrin->const_index[i]);
+ break;
+ case const_indices_16bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ blob_write_uint16(ctx->blob, intrin->const_index[i]);
+ break;
+ case const_indices_32bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ blob_write_uint32(ctx->blob, intrin->const_index[i]);
+ break;
+ }
+ }
}
static nir_intrinsic_instr *
-read_intrinsic(read_ctx *ctx)
+read_intrinsic(read_ctx *ctx, union packed_instr header)
{
- nir_intrinsic_op op = blob_read_uint32(ctx->blob);
-
+ nir_intrinsic_op op = header.intrinsic.intrinsic;
nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
- unsigned num_variables = nir_intrinsic_infos[op].num_variables;
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
unsigned num_indices = nir_intrinsic_infos[op].num_indices;
- intrin->num_components = blob_read_uint32(ctx->blob);
+ intrin->num_components =
+ decode_num_components_in_3bits(header.intrinsic.num_components);
if (nir_intrinsic_infos[op].has_dest)
- read_dest(ctx, &intrin->dest, &intrin->instr);
-
- for (unsigned i = 0; i < num_variables; i++)
- intrin->variables[i] = read_deref_chain(ctx, &intrin->instr);
+ read_dest(ctx, &intrin->dest, &intrin->instr, header);
for (unsigned i = 0; i < num_srcs; i++)
read_src(ctx, &intrin->src[i], &intrin->instr);
- for (unsigned i = 0; i < num_indices; i++)
- intrin->const_index[i] = blob_read_uint32(ctx->blob);
+ if (num_indices) {
+ switch (header.intrinsic.const_indices_encoding) {
+ case const_indices_6bit_all_combined: {
+ unsigned bit_size = 6 / num_indices;
+ unsigned bit_mask = u_bit_consecutive(0, bit_size);
+ for (unsigned i = 0; i < num_indices; i++) {
+ intrin->const_index[i] =
+ (header.intrinsic.packed_const_indices >> (i * bit_size)) &
+ bit_mask;
+ }
+ break;
+ }
+ case const_indices_8bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ intrin->const_index[i] = blob_read_uint8(ctx->blob);
+ break;
+ case const_indices_16bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ intrin->const_index[i] = blob_read_uint16(ctx->blob);
+ break;
+ case const_indices_32bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ intrin->const_index[i] = blob_read_uint32(ctx->blob);
+ break;
+ }
+ }
return intrin;
}
static void
write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
{
- uint32_t val = lc->def.num_components;
- val |= lc->def.bit_size << 3;
- blob_write_uint32(ctx->blob, val);
- blob_write_bytes(ctx->blob, (uint8_t *) &lc->value, sizeof(lc->value));
+ assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.load_const.instr_type = lc->instr.type;
+ header.load_const.last_component = lc->def.num_components - 1;
+ header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
+ header.load_const.packing = load_const_full;
+
+ /* Try to pack 1-component constants into the 19 free bits in the header. */
+ if (lc->def.num_components == 1) {
+ switch (lc->def.bit_size) {
+ case 64:
+ if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
+ /* packed_value contains high 19 bits, low bits are 0 */
+ header.load_const.packing = load_const_scalar_hi_19bits;
+ header.load_const.packed_value = lc->value[0].u64 >> 45;
+ } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
+ /* packed_value contains low 19 bits, high bits are sign-extended */
+ header.load_const.packing = load_const_scalar_lo_19bits_sext;
+ header.load_const.packed_value = lc->value[0].u64;
+ }
+ break;
+
+ case 32:
+ if ((lc->value[0].u32 & 0x1fff) == 0) {
+ header.load_const.packing = load_const_scalar_hi_19bits;
+ header.load_const.packed_value = lc->value[0].u32 >> 13;
+ } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
+ header.load_const.packing = load_const_scalar_lo_19bits_sext;
+ header.load_const.packed_value = lc->value[0].u32;
+ }
+ break;
+
+ case 16:
+ header.load_const.packing = load_const_scalar_lo_19bits_sext;
+ header.load_const.packed_value = lc->value[0].u16;
+ break;
+ case 8:
+ header.load_const.packing = load_const_scalar_lo_19bits_sext;
+ header.load_const.packed_value = lc->value[0].u8;
+ break;
+ case 1:
+ header.load_const.packing = load_const_scalar_lo_19bits_sext;
+ header.load_const.packed_value = lc->value[0].b;
+ break;
+ default:
+ unreachable("invalid bit_size");
+ }
+ }
+
+ blob_write_uint32(ctx->blob, header.u32);
+
+ if (header.load_const.packing == load_const_full) {
+ switch (lc->def.bit_size) {
+ case 64:
+ blob_write_bytes(ctx->blob, lc->value,
+ sizeof(*lc->value) * lc->def.num_components);
+ break;
+
+ case 32:
+ for (unsigned i = 0; i < lc->def.num_components; i++)
+ blob_write_uint32(ctx->blob, lc->value[i].u32);
+ break;
+
+ case 16:
+ for (unsigned i = 0; i < lc->def.num_components; i++)
+ blob_write_uint16(ctx->blob, lc->value[i].u16);
+ break;
+
+ default:
+ assert(lc->def.bit_size <= 8);
+ for (unsigned i = 0; i < lc->def.num_components; i++)
+ blob_write_uint8(ctx->blob, lc->value[i].u8);
+ break;
+ }
+ }
+
write_add_object(ctx, &lc->def);
}
static nir_load_const_instr *
-read_load_const(read_ctx *ctx)
+read_load_const(read_ctx *ctx, union packed_instr header)
{
- uint32_t val = blob_read_uint32(ctx->blob);
-
nir_load_const_instr *lc =
- nir_load_const_instr_create(ctx->nir, val & 0x7, val >> 3);
+ nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
+ decode_bit_size_3bits(header.load_const.bit_size));
+
+ switch (header.load_const.packing) {
+ case load_const_scalar_hi_19bits:
+ switch (lc->def.bit_size) {
+ case 64:
+ lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
+ break;
+ case 32:
+ lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
+ break;
+ default:
+ unreachable("invalid bit_size");
+ }
+ break;
+
+ case load_const_scalar_lo_19bits_sext:
+ switch (lc->def.bit_size) {
+ case 64:
+ lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
+ break;
+ case 32:
+ lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
+ break;
+ case 16:
+ lc->value[0].u16 = header.load_const.packed_value;
+ break;
+ case 8:
+ lc->value[0].u8 = header.load_const.packed_value;
+ break;
+ case 1:
+ lc->value[0].b = header.load_const.packed_value;
+ break;
+ default:
+ unreachable("invalid bit_size");
+ }
+ break;
+
+ case load_const_full:
+ switch (lc->def.bit_size) {
+ case 64:
+ blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
+ break;
+
+ case 32:
+ for (unsigned i = 0; i < lc->def.num_components; i++)
+ lc->value[i].u32 = blob_read_uint32(ctx->blob);
+ break;
+
+ case 16:
+ for (unsigned i = 0; i < lc->def.num_components; i++)
+ lc->value[i].u16 = blob_read_uint16(ctx->blob);
+ break;
+
+ default:
+ assert(lc->def.bit_size <= 8);
+ for (unsigned i = 0; i < lc->def.num_components; i++)
+ lc->value[i].u8 = blob_read_uint8(ctx->blob);
+ break;
+ }
+ break;
+ }
- blob_copy_bytes(ctx->blob, (uint8_t *) &lc->value, sizeof(lc->value));
read_add_object(ctx, &lc->def);
return lc;
}
static void
write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
{
- uint32_t val = undef->def.num_components;
- val |= undef->def.bit_size << 3;
- blob_write_uint32(ctx->blob, val);
+ assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
+
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.undef.instr_type = undef->instr.type;
+ header.undef.last_component = undef->def.num_components - 1;
+ header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
+
+ blob_write_uint32(ctx->blob, header.u32);
write_add_object(ctx, &undef->def);
}
static nir_ssa_undef_instr *
-read_ssa_undef(read_ctx *ctx)
+read_ssa_undef(read_ctx *ctx, union packed_instr header)
{
- uint32_t val = blob_read_uint32(ctx->blob);
-
nir_ssa_undef_instr *undef =
- nir_ssa_undef_instr_create(ctx->nir, val & 0x7, val >> 3);
+ nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
+ decode_bit_size_3bits(header.undef.bit_size));
read_add_object(ctx, &undef->def);
return undef;
unsigned is_shadow:1;
unsigned is_new_style_shadow:1;
unsigned component:2;
- unsigned has_texture_deref:1;
- unsigned has_sampler_deref:1;
unsigned unused:10; /* Mark unused for valgrind. */
} u;
};
static void
write_tex(write_ctx *ctx, const nir_tex_instr *tex)
{
- blob_write_uint32(ctx->blob, tex->num_srcs);
- blob_write_uint32(ctx->blob, tex->op);
+ assert(tex->num_srcs < 16);
+ assert(tex->op < 16);
+ assert(tex->texture_array_size < 1024);
+
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.tex.instr_type = tex->instr.type;
+ header.tex.num_srcs = tex->num_srcs;
+ header.tex.op = tex->op;
+ header.tex.texture_array_size = tex->texture_array_size;
+
+ write_dest(ctx, &tex->dest, header, tex->instr.type);
+
blob_write_uint32(ctx->blob, tex->texture_index);
- blob_write_uint32(ctx->blob, tex->texture_array_size);
blob_write_uint32(ctx->blob, tex->sampler_index);
+ if (tex->op == nir_texop_tg4)
+ blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
union packed_tex_data packed = {
.u.is_shadow = tex->is_shadow,
.u.is_new_style_shadow = tex->is_new_style_shadow,
.u.component = tex->component,
- .u.has_texture_deref = tex->texture != NULL,
- .u.has_sampler_deref = tex->sampler != NULL,
};
blob_write_uint32(ctx->blob, packed.u32);
- write_dest(ctx, &tex->dest);
for (unsigned i = 0; i < tex->num_srcs; i++) {
- blob_write_uint32(ctx->blob, tex->src[i].src_type);
- write_src(ctx, &tex->src[i].src);
+ union packed_src src;
+ src.u32 = 0;
+ src.tex.src_type = tex->src[i].src_type;
+ write_src_full(ctx, &tex->src[i].src, src);
}
-
- if (tex->texture)
- write_deref_chain(ctx, tex->texture);
- if (tex->sampler)
- write_deref_chain(ctx, tex->sampler);
}
static nir_tex_instr *
-read_tex(read_ctx *ctx)
+read_tex(read_ctx *ctx, union packed_instr header)
{
- unsigned num_srcs = blob_read_uint32(ctx->blob);
- nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, num_srcs);
+ nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
+
+ read_dest(ctx, &tex->dest, &tex->instr, header);
- tex->op = blob_read_uint32(ctx->blob);
+ tex->op = header.tex.op;
tex->texture_index = blob_read_uint32(ctx->blob);
- tex->texture_array_size = blob_read_uint32(ctx->blob);
+ tex->texture_array_size = header.tex.texture_array_size;
tex->sampler_index = blob_read_uint32(ctx->blob);
+ if (tex->op == nir_texop_tg4)
+ blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
union packed_tex_data packed;
packed.u32 = blob_read_uint32(ctx->blob);
tex->is_new_style_shadow = packed.u.is_new_style_shadow;
tex->component = packed.u.component;
- read_dest(ctx, &tex->dest, &tex->instr);
for (unsigned i = 0; i < tex->num_srcs; i++) {
- tex->src[i].src_type = blob_read_uint32(ctx->blob);
- read_src(ctx, &tex->src[i].src, &tex->instr);
+ union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
+ tex->src[i].src_type = src.tex.src_type;
}
- tex->texture = packed.u.has_texture_deref ?
- read_deref_chain(ctx, &tex->instr) : NULL;
- tex->sampler = packed.u.has_sampler_deref ?
- read_deref_chain(ctx, &tex->instr) : NULL;
-
return tex;
}
static void
write_phi(write_ctx *ctx, const nir_phi_instr *phi)
{
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.phi.instr_type = phi->instr.type;
+ header.phi.num_srcs = exec_list_length(&phi->srcs);
+
/* Phi nodes are special, since they may reference SSA definitions and
- * basic blocks that don't exist yet. We leave two empty uintptr_t's here,
+ * basic blocks that don't exist yet. We leave two empty uint32_t's here,
* and then store enough information so that a later fixup pass can fill
* them in correctly.
*/
- write_dest(ctx, &phi->dest);
-
- blob_write_uint32(ctx->blob, exec_list_length(&phi->srcs));
+ write_dest(ctx, &phi->dest, header, phi->instr.type);
nir_foreach_phi_src(src, phi) {
assert(src->src.is_ssa);
- size_t blob_offset = blob_reserve_intptr(ctx->blob);
- MAYBE_UNUSED size_t blob_offset2 = blob_reserve_intptr(ctx->blob);
- assert(blob_offset + sizeof(uintptr_t) == blob_offset2);
+ size_t blob_offset = blob_reserve_uint32(ctx->blob);
+ ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
+ assert(blob_offset + sizeof(uint32_t) == blob_offset2);
write_phi_fixup fixup = {
.blob_offset = blob_offset,
.src = src->src.ssa,
write_fixup_phis(write_ctx *ctx)
{
util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
- uintptr_t *blob_ptr = (uintptr_t *)(ctx->blob->data + fixup->blob_offset);
+ uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
blob_ptr[0] = write_lookup_object(ctx, fixup->src);
blob_ptr[1] = write_lookup_object(ctx, fixup->block);
}
}
static nir_phi_instr *
-read_phi(read_ctx *ctx, nir_block *blk)
+read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
{
nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
- read_dest(ctx, &phi->dest, &phi->instr);
-
- unsigned num_srcs = blob_read_uint32(ctx->blob);
+ read_dest(ctx, &phi->dest, &phi->instr, header);
/* For similar reasons as before, we just store the index directly into the
* pointer, and let a later pass resolve the phi sources.
*/
nir_instr_insert_after_block(blk, &phi->instr);
- for (unsigned i = 0; i < num_srcs; i++) {
+ for (unsigned i = 0; i < header.phi.num_srcs; i++) {
nir_phi_src *src = ralloc(phi, nir_phi_src);
src->src.is_ssa = true;
- src->src.ssa = (nir_ssa_def *) blob_read_intptr(ctx->blob);
- src->pred = (nir_block *) blob_read_intptr(ctx->blob);
+ src->src.ssa = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
+ src->pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
/* Since we're not letting nir_insert_instr handle use/def stuff for us,
* we have to set the parent_instr manually. It doesn't really matter
list_addtail(&src->src.use_link, &src->src.ssa->uses);
}
- assert(list_empty(&ctx->phi_srcs));
+ assert(list_is_empty(&ctx->phi_srcs));
}
static void
write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
{
- blob_write_uint32(ctx->blob, jmp->type);
+ assert(jmp->type < 4);
+
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.jump.instr_type = jmp->instr.type;
+ header.jump.type = jmp->type;
+
+ blob_write_uint32(ctx->blob, header.u32);
}
static nir_jump_instr *
-read_jump(read_ctx *ctx)
+read_jump(read_ctx *ctx, union packed_instr header)
{
- nir_jump_type type = blob_read_uint32(ctx->blob);
- nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, type);
+ nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
return jmp;
}
static void
write_call(write_ctx *ctx, const nir_call_instr *call)
{
- blob_write_intptr(ctx->blob, write_lookup_object(ctx, call->callee));
+ blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
for (unsigned i = 0; i < call->num_params; i++)
- write_deref_chain(ctx, call->params[i]);
-
- write_deref_chain(ctx, call->return_deref);
+ write_src(ctx, &call->params[i]);
}
static nir_call_instr *
nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
for (unsigned i = 0; i < call->num_params; i++)
- call->params[i] = read_deref_chain(ctx, &call->instr);
-
- call->return_deref = read_deref_chain(ctx, &call->instr);
+ read_src(ctx, &call->params[i], call);
return call;
}
static void
write_instr(write_ctx *ctx, const nir_instr *instr)
{
- blob_write_uint32(ctx->blob, instr->type);
+ /* We have only 4 bits for the instruction type. */
+ assert(instr->type < 16);
+
switch (instr->type) {
case nir_instr_type_alu:
write_alu(ctx, nir_instr_as_alu(instr));
write_jump(ctx, nir_instr_as_jump(instr));
break;
case nir_instr_type_call:
+ blob_write_uint32(ctx->blob, instr->type);
write_call(ctx, nir_instr_as_call(instr));
break;
case nir_instr_type_parallel_copy:
}
}
-static void
+/* Return the number of instructions read. */
+static unsigned
read_instr(read_ctx *ctx, nir_block *block)
{
- nir_instr_type type = blob_read_uint32(ctx->blob);
+ STATIC_ASSERT(sizeof(union packed_instr) == 4);
+ union packed_instr header;
+ header.u32 = blob_read_uint32(ctx->blob);
nir_instr *instr;
- switch (type) {
+
+ switch (header.any.instr_type) {
case nir_instr_type_alu:
- instr = &read_alu(ctx)->instr;
- break;
+ for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
+ nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
+ return header.alu.num_followup_alu_sharing_header + 1;
case nir_instr_type_deref:
- instr = &read_deref(ctx)->instr;
+ instr = &read_deref(ctx, header)->instr;
break;
case nir_instr_type_intrinsic:
- instr = &read_intrinsic(ctx)->instr;
+ instr = &read_intrinsic(ctx, header)->instr;
break;
case nir_instr_type_load_const:
- instr = &read_load_const(ctx)->instr;
+ instr = &read_load_const(ctx, header)->instr;
break;
case nir_instr_type_ssa_undef:
- instr = &read_ssa_undef(ctx)->instr;
+ instr = &read_ssa_undef(ctx, header)->instr;
break;
case nir_instr_type_tex:
- instr = &read_tex(ctx)->instr;
+ instr = &read_tex(ctx, header)->instr;
break;
case nir_instr_type_phi:
/* Phi instructions are a bit of a special case when reading because we
* for us. Instead, we need to wait until all the blocks/instructions
* are read so that we can set their sources up.
*/
- read_phi(ctx, block);
- return;
+ read_phi(ctx, block, header);
+ return 1;
case nir_instr_type_jump:
- instr = &read_jump(ctx)->instr;
+ instr = &read_jump(ctx, header)->instr;
break;
case nir_instr_type_call:
instr = &read_call(ctx)->instr;
}
nir_instr_insert_after_block(block, instr);
+ return 1;
}
static void
{
write_add_object(ctx, block);
blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
- nir_foreach_instr(instr, block)
+
+ ctx->last_instr_type = ~0;
+ ctx->last_alu_header_offset = 0;
+
+ nir_foreach_instr(instr, block) {
write_instr(ctx, instr);
+ ctx->last_instr_type = instr->type;
+ }
}
static void
read_add_object(ctx, block);
unsigned num_instrs = blob_read_uint32(ctx->blob);
- for (unsigned i = 0; i < num_instrs; i++) {
- read_instr(ctx, block);
+ for (unsigned i = 0; i < num_instrs;) {
+ i += read_instr(ctx, block);
}
}
write_reg_list(ctx, &fi->registers);
blob_write_uint32(ctx->blob, fi->reg_alloc);
- blob_write_uint32(ctx->blob, fi->num_params);
- for (unsigned i = 0; i < fi->num_params; i++) {
- write_variable(ctx, fi->params[i]);
- }
-
- blob_write_uint32(ctx->blob, !!(fi->return_var));
- if (fi->return_var)
- write_variable(ctx, fi->return_var);
-
write_cf_list(ctx, &fi->body);
write_fixup_phis(ctx);
}
read_reg_list(ctx, &fi->registers);
fi->reg_alloc = blob_read_uint32(ctx->blob);
- fi->num_params = blob_read_uint32(ctx->blob);
- for (unsigned i = 0; i < fi->num_params; i++) {
- fi->params[i] = read_variable(ctx);
- }
-
- bool has_return = blob_read_uint32(ctx->blob);
- if (has_return)
- fi->return_var = read_variable(ctx);
- else
- fi->return_var = NULL;
-
read_cf_list(ctx, &fi->body);
read_fixup_phis(ctx);
static void
write_function(write_ctx *ctx, const nir_function *fxn)
{
- blob_write_uint32(ctx->blob, !!(fxn->name));
+ uint32_t flags = fxn->is_entrypoint;
+ if (fxn->name)
+ flags |= 0x2;
+ if (fxn->impl)
+ flags |= 0x4;
+ blob_write_uint32(ctx->blob, flags);
if (fxn->name)
blob_write_string(ctx->blob, fxn->name);
blob_write_uint32(ctx->blob, fxn->num_params);
for (unsigned i = 0; i < fxn->num_params; i++) {
- blob_write_uint32(ctx->blob, fxn->params[i].param_type);
- encode_type_to_blob(ctx->blob, fxn->params[i].type);
+ uint32_t val =
+ ((uint32_t)fxn->params[i].num_components) |
+ ((uint32_t)fxn->params[i].bit_size) << 8;
+ blob_write_uint32(ctx->blob, val);
}
- encode_type_to_blob(ctx->blob, fxn->return_type);
-
/* At first glance, it looks like we should write the function_impl here.
* However, call instructions need to be able to reference at least the
* function and those will get processed as we write the function_impls.
static void
read_function(read_ctx *ctx)
{
- bool has_name = blob_read_uint32(ctx->blob);
+ uint32_t flags = blob_read_uint32(ctx->blob);
+ bool has_name = flags & 0x2;
char *name = has_name ? blob_read_string(ctx->blob) : NULL;
nir_function *fxn = nir_function_create(ctx->nir, name);
read_add_object(ctx, fxn);
fxn->num_params = blob_read_uint32(ctx->blob);
+ fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
for (unsigned i = 0; i < fxn->num_params; i++) {
- fxn->params[i].param_type = blob_read_uint32(ctx->blob);
- fxn->params[i].type = decode_type_from_blob(ctx->blob);
+ uint32_t val = blob_read_uint32(ctx->blob);
+ fxn->params[i].num_components = val & 0xff;
+ fxn->params[i].bit_size = (val >> 8) & 0xff;
}
- fxn->return_type = decode_type_from_blob(ctx->blob);
+ fxn->is_entrypoint = flags & 0x1;
+ if (flags & 0x4)
+ fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
}
+/**
+ * Serialize NIR into a binary blob.
+ *
+ * \param strip Don't serialize information only useful for debugging,
+ * such as variable names, making cache hits from similar
+ * shaders more likely.
+ */
void
-nir_serialize(struct blob *blob, const nir_shader *nir)
+nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
{
- write_ctx ctx;
- ctx.remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- ctx.next_idx = 0;
+ write_ctx ctx = {0};
+ ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
ctx.blob = blob;
ctx.nir = nir;
+ ctx.strip = strip;
util_dynarray_init(&ctx.phi_fixups, NULL);
- size_t idx_size_offset = blob_reserve_intptr(blob);
+ size_t idx_size_offset = blob_reserve_uint32(blob);
struct shader_info info = nir->info;
uint32_t strings = 0;
- if (info.name)
+ if (!strip && info.name)
strings |= 0x1;
- if (info.label)
+ if (!strip && info.label)
strings |= 0x2;
blob_write_uint32(blob, strings);
- if (info.name)
+ if (!strip && info.name)
blob_write_string(blob, info.name);
- if (info.label)
+ if (!strip && info.label)
blob_write_string(blob, info.label);
info.name = info.label = NULL;
blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
write_var_list(&ctx, &nir->globals);
write_var_list(&ctx, &nir->system_values);
- write_reg_list(&ctx, &nir->registers);
- blob_write_uint32(blob, nir->reg_alloc);
blob_write_uint32(blob, nir->num_inputs);
blob_write_uint32(blob, nir->num_uniforms);
blob_write_uint32(blob, nir->num_outputs);
blob_write_uint32(blob, nir->num_shared);
- blob_write_uint32(blob, nir->lowered_derefs);
+ blob_write_uint32(blob, nir->scratch_size);
blob_write_uint32(blob, exec_list_length(&nir->functions));
nir_foreach_function(fxn, nir) {
}
nir_foreach_function(fxn, nir) {
- write_function_impl(&ctx, fxn->impl);
+ if (fxn->impl)
+ write_function_impl(&ctx, fxn->impl);
}
- *(uintptr_t *)(blob->data + idx_size_offset) = ctx.next_idx;
+ blob_write_uint32(blob, nir->constant_data_size);
+ if (nir->constant_data_size > 0)
+ blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
+
+ *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
_mesa_hash_table_destroy(ctx.remap_table, NULL);
util_dynarray_fini(&ctx.phi_fixups);
const struct nir_shader_compiler_options *options,
struct blob_reader *blob)
{
- read_ctx ctx;
+ read_ctx ctx = {0};
ctx.blob = blob;
list_inithead(&ctx.phi_srcs);
- ctx.idx_table_len = blob_read_intptr(blob);
+ ctx.idx_table_len = blob_read_uint32(blob);
ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
- ctx.next_idx = 0;
uint32_t strings = blob_read_uint32(blob);
char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
read_var_list(&ctx, &ctx.nir->globals);
read_var_list(&ctx, &ctx.nir->system_values);
- read_reg_list(&ctx, &ctx.nir->registers);
- ctx.nir->reg_alloc = blob_read_uint32(blob);
ctx.nir->num_inputs = blob_read_uint32(blob);
ctx.nir->num_uniforms = blob_read_uint32(blob);
ctx.nir->num_outputs = blob_read_uint32(blob);
ctx.nir->num_shared = blob_read_uint32(blob);
- ctx.nir->lowered_derefs = blob_read_uint32(blob);
+ ctx.nir->scratch_size = blob_read_uint32(blob);
unsigned num_functions = blob_read_uint32(blob);
for (unsigned i = 0; i < num_functions; i++)
read_function(&ctx);
- nir_foreach_function(fxn, ctx.nir)
- fxn->impl = read_function_impl(&ctx, fxn);
+ nir_foreach_function(fxn, ctx.nir) {
+ if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
+ fxn->impl = read_function_impl(&ctx, fxn);
+ }
+
+ ctx.nir->constant_data_size = blob_read_uint32(blob);
+ if (ctx.nir->constant_data_size > 0) {
+ ctx.nir->constant_data =
+ ralloc_size(ctx.nir, ctx.nir->constant_data_size);
+ blob_copy_bytes(blob, ctx.nir->constant_data,
+ ctx.nir->constant_data_size);
+ }
free(ctx.idx_table);
return ctx.nir;
}
-nir_shader *
-nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s)
+void
+nir_shader_serialize_deserialize(nir_shader *shader)
{
- const struct nir_shader_compiler_options *options = s->options;
+ const struct nir_shader_compiler_options *options = shader->options;
struct blob writer;
blob_init(&writer);
- nir_serialize(&writer, s);
- ralloc_free(s);
+ nir_serialize(&writer, shader, false);
+
+ /* Delete all of dest's ralloc children but leave dest alone */
+ void *dead_ctx = ralloc_context(NULL);
+ ralloc_adopt(dead_ctx, shader);
+ ralloc_free(dead_ctx);
+
+ dead_ctx = ralloc_context(NULL);
struct blob_reader reader;
blob_reader_init(&reader, writer.data, writer.size);
- nir_shader *ns = nir_deserialize(mem_ctx, options, &reader);
+ nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
blob_finish(&writer);
- return ns;
+ nir_shader_replace(shader, copy);
+ ralloc_free(dead_ctx);
}