return 0;
}
+#define NUM_COMPONENTS_IS_SEPARATE_7 7
+
static uint8_t
encode_num_components_in_3bits(uint8_t num_components)
{
if (num_components == 16)
return 6;
- unreachable("invalid number in num_components");
- return 0;
+ /* special value indicating that num_components is in the next uint32 */
+ return NUM_COMPONENTS_IS_SEPARATE_7;
}
static uint8_t
struct {
unsigned has_name:1;
unsigned has_constant_initializer:1;
+ unsigned has_pointer_initializer:1;
unsigned has_interface_type:1;
unsigned num_state_slots:7;
unsigned data_encoding:2;
unsigned type_same_as_last:1;
unsigned interface_type_same_as_last:1;
- unsigned _pad:2;
+ unsigned _pad:1;
unsigned num_members:16;
} u;
};
write_add_object(ctx, var);
assert(var->num_state_slots < (1 << 7));
- assert(var->num_members < (1 << 16));
STATIC_ASSERT(sizeof(union packed_var) == 4);
union packed_var flags;
flags.u.has_name = !ctx->strip && var->name;
flags.u.has_constant_initializer = !!(var->constant_initializer);
+ flags.u.has_pointer_initializer = !!(var->pointer_initializer);
flags.u.has_interface_type = !!(var->interface_type);
flags.u.type_same_as_last = var->type == ctx->last_type;
flags.u.interface_type_same_as_last =
}
if (var->constant_initializer)
write_constant(ctx, var->constant_initializer);
+ if (var->pointer_initializer)
+ write_lookup_object(ctx, var->pointer_initializer);
if (var->num_members > 0) {
blob_write_bytes(ctx->blob, (uint8_t *) var->members,
var->num_members * sizeof(*var->members));
var->constant_initializer = read_constant(ctx, var);
else
var->constant_initializer = NULL;
+
+ if (flags.u.has_pointer_initializer)
+ var->pointer_initializer = read_object(ctx);
+ else
+ var->pointer_initializer = NULL;
+
var->num_members = flags.u.num_members;
if (var->num_members > 0) {
var->members = ralloc_array(var, struct nir_variable_data,
};
enum intrinsic_const_indices_encoding {
- /* Use the 6 bits of packed_const_indices to store 1-6 indices.
- * 1 6-bit index, or 2 3-bit indices, or 3 2-bit indices, or
- * 4-6 1-bit indices.
+ /* Use the 9 bits of packed_const_indices to store 1-9 indices.
+ * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
+ * 4 2-bit indices, or 5-9 1-bit indices.
*
* The common case for load_ubo is 0, 0, 0, which is trivially represented.
* The common cases for load_interpolated_input also fit here, e.g.: 7, 3
*/
- const_indices_6bit_all_combined,
+ const_indices_9bit_all_combined,
const_indices_8bit, /* 8 bits per element */
const_indices_16bit, /* 16 bits per element */
struct {
unsigned instr_type:4;
unsigned intrinsic:9;
- unsigned num_components:3;
unsigned const_indices_encoding:2;
- unsigned packed_const_indices:6;
+ unsigned packed_const_indices:9;
unsigned dest:8;
} intrinsic;
struct {
unsigned instr_type:4;
unsigned num_srcs:4;
unsigned op:4;
- unsigned texture_array_size:12;
unsigned dest:8;
+ unsigned _pad:12;
} tex;
struct {
unsigned instr_type:4;
blob_write_uint32(ctx->blob, header.u32);
}
+ if (dest.ssa.is_ssa &&
+ dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
+ blob_write_uint32(ctx->blob, dst->ssa.num_components);
+
if (dst->is_ssa) {
write_add_object(ctx, &dst->ssa);
if (dest.ssa.has_name)
if (dest.ssa.is_ssa) {
unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
- unsigned num_components =
- decode_num_components_in_3bits(dest.ssa.num_components);
+ unsigned num_components;
+ if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
+ num_components = blob_read_uint32(ctx->blob);
+ else
+ num_components = decode_num_components_in_3bits(dest.ssa.num_components);
char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
read_add_object(ctx, &dst->ssa);
/* The swizzles for src0.x and src1.x are stored
* in writemask_or_two_swizzles for SSA ALUs.
*/
- if (alu->dest.dest.is_ssa && i < 2 && chan == 0)
+ if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
+ alu->src[i].swizzle[chan] < 4)
continue;
if (alu->src[i].swizzle[chan] != chan)
write_alu(write_ctx *ctx, const nir_alu_instr *alu)
{
unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
+ unsigned dst_components = nir_dest_num_components(alu->dest.dest);
/* 9 bits for nir_op */
STATIC_ASSERT(nir_num_opcodes <= 512);
header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
if (num_srcs > 1)
header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
- } else if (!alu->dest.dest.is_ssa) {
- /* For registers, this field is a writemask. */
+ } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
+ /* For vec4 registers, this field is a writemask. */
header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
}
write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
+ if (!alu->dest.dest.is_ssa && dst_components > 4)
+ blob_write_uint32(ctx->blob, alu->dest.write_mask);
+
if (header.alu.packed_src_ssa_16bit) {
for (unsigned i = 0; i < num_srcs; i++) {
assert(alu->src[i].src.is_ssa);
}
} else {
for (unsigned i = 0; i < num_srcs; i++) {
- unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
+ unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
+ unsigned src_components = nir_src_num_components(alu->src[i].src);
union packed_src src;
+ bool packed = src_components <= 4 && src_channels <= 4;
src.u32 = 0;
src.alu.negate = alu->src[i].negate;
src.alu.abs = alu->src[i].abs;
- if (src_components <= 4) {
+ if (packed) {
src.alu.swizzle_x = alu->src[i].swizzle[0];
src.alu.swizzle_y = alu->src[i].swizzle[1];
src.alu.swizzle_z = alu->src[i].swizzle[2];
write_src_full(ctx, &alu->src[i].src, src);
/* Store swizzles for vec8 and vec16. */
- if (src_components > 4) {
- for (unsigned i = 0; i < src_components; i += 8) {
+ if (!packed) {
+ for (unsigned o = 0; o < src_channels; o += 8) {
unsigned value = 0;
- for (unsigned j = 0; j < 8 && i + j < src_components; j++) {
- value |= alu->src[i].swizzle[i + j] <<
+ for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
+ value |= (uint32_t)alu->src[i].swizzle[o + j] <<
(4 * j); /* 4 bits per swizzle */
}
read_dest(ctx, &alu->dest.dest, &alu->instr, header);
+ unsigned dst_components = nir_dest_num_components(alu->dest.dest);
+
+ if (alu->dest.dest.is_ssa) {
+ alu->dest.write_mask = u_bit_consecutive(0, dst_components);
+ } else if (dst_components <= 4) {
+ alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
+ } else {
+ alu->dest.write_mask = blob_read_uint32(ctx->blob);
+ }
+
if (header.alu.packed_src_ssa_16bit) {
for (unsigned i = 0; i < num_srcs; i++) {
nir_alu_src *src = &alu->src[i];
} else {
for (unsigned i = 0; i < num_srcs; i++) {
union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
- unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
+ unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
+ unsigned src_components = nir_src_num_components(alu->src[i].src);
+ bool packed = src_components <= 4 && src_channels <= 4;
alu->src[i].negate = src.alu.negate;
alu->src[i].abs = src.alu.abs;
memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
- if (src_components <= 4) {
+ if (packed) {
alu->src[i].swizzle[0] = src.alu.swizzle_x;
alu->src[i].swizzle[1] = src.alu.swizzle_y;
alu->src[i].swizzle[2] = src.alu.swizzle_z;
alu->src[i].swizzle[3] = src.alu.swizzle_w;
} else {
/* Load swizzles for vec8 and vec16. */
- for (unsigned i = 0; i < src_components; i += 8) {
+ for (unsigned o = 0; o < src_channels; o += 8) {
unsigned value = blob_read_uint32(ctx->blob);
- for (unsigned j = 0; j < 8 && i + j < src_components; j++) {
- alu->src[i].swizzle[i + j] =
+ for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
+ alu->src[i].swizzle[o + j] =
(value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
}
}
}
}
- if (alu->dest.dest.is_ssa) {
- alu->dest.write_mask =
- u_bit_consecutive(0, alu->dest.dest.ssa.num_components);
- } else {
- alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
- }
-
if (header.alu.packed_src_ssa_16bit &&
alu->dest.dest.is_ssa) {
alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
header.intrinsic.instr_type = intrin->instr.type;
header.intrinsic.intrinsic = intrin->intrinsic;
- header.intrinsic.num_components =
- encode_num_components_in_3bits(intrin->num_components);
/* Analyze constant indices to decide how to encode them. */
if (num_indices) {
max_bits = MAX2(max_bits, max);
}
- if (max_bits * num_indices <= 6) {
- header.intrinsic.const_indices_encoding = const_indices_6bit_all_combined;
+ if (max_bits * num_indices <= 9) {
+ header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
/* Pack all const indices into 6 bits. */
- unsigned bit_size = 6 / num_indices;
+ unsigned bit_size = 9 / num_indices;
for (unsigned i = 0; i < num_indices; i++) {
header.intrinsic.packed_const_indices |=
intrin->const_index[i] << (i * bit_size);
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
unsigned num_indices = nir_intrinsic_infos[op].num_indices;
- intrin->num_components =
- decode_num_components_in_3bits(header.intrinsic.num_components);
-
if (nir_intrinsic_infos[op].has_dest)
read_dest(ctx, &intrin->dest, &intrin->instr, header);
for (unsigned i = 0; i < num_srcs; i++)
read_src(ctx, &intrin->src[i], &intrin->instr);
+ /* Vectorized instrinsics have num_components same as dst or src that has
+ * 0 components in the info. Find it.
+ */
+ if (nir_intrinsic_infos[op].has_dest &&
+ nir_intrinsic_infos[op].dest_components == 0) {
+ intrin->num_components = nir_dest_num_components(intrin->dest);
+ } else {
+ for (unsigned i = 0; i < num_srcs; i++) {
+ if (nir_intrinsic_infos[op].src_components[i] == 0) {
+ intrin->num_components = nir_src_num_components(intrin->src[i]);
+ break;
+ }
+ }
+ }
+
if (num_indices) {
switch (header.intrinsic.const_indices_encoding) {
- case const_indices_6bit_all_combined: {
- unsigned bit_size = 6 / num_indices;
+ case const_indices_9bit_all_combined: {
+ unsigned bit_size = 9 / num_indices;
unsigned bit_mask = u_bit_consecutive(0, bit_size);
for (unsigned i = 0; i < num_indices; i++) {
intrin->const_index[i] =
union packed_tex_data {
uint32_t u32;
struct {
- enum glsl_sampler_dim sampler_dim:4;
- nir_alu_type dest_type:8;
+ unsigned sampler_dim:4;
+ unsigned dest_type:8;
unsigned coord_components:3;
unsigned is_array:1;
unsigned is_shadow:1;
unsigned is_new_style_shadow:1;
unsigned component:2;
- unsigned unused:10; /* Mark unused for valgrind. */
+ unsigned texture_non_uniform:1;
+ unsigned sampler_non_uniform:1;
+ unsigned unused:8; /* Mark unused for valgrind. */
} u;
};
{
assert(tex->num_srcs < 16);
assert(tex->op < 16);
- assert(tex->texture_array_size < 1024);
union packed_instr header;
header.u32 = 0;
header.tex.instr_type = tex->instr.type;
header.tex.num_srcs = tex->num_srcs;
header.tex.op = tex->op;
- header.tex.texture_array_size = tex->texture_array_size;
write_dest(ctx, &tex->dest, header, tex->instr.type);
.u.is_shadow = tex->is_shadow,
.u.is_new_style_shadow = tex->is_new_style_shadow,
.u.component = tex->component,
+ .u.texture_non_uniform = tex->texture_non_uniform,
+ .u.sampler_non_uniform = tex->sampler_non_uniform,
};
blob_write_uint32(ctx->blob, packed.u32);
tex->op = header.tex.op;
tex->texture_index = blob_read_uint32(ctx->blob);
- tex->texture_array_size = header.tex.texture_array_size;
tex->sampler_index = blob_read_uint32(ctx->blob);
if (tex->op == nir_texop_tg4)
blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
tex->is_shadow = packed.u.is_shadow;
tex->is_new_style_shadow = packed.u.is_new_style_shadow;
tex->component = packed.u.component;
+ tex->texture_non_uniform = packed.u.texture_non_uniform;
+ tex->sampler_non_uniform = packed.u.sampler_non_uniform;
for (unsigned i = 0; i < tex->num_srcs; i++) {
union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
static void
write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
{
+ blob_write_uint8(ctx->blob, fi->structured);
+
write_var_list(ctx, &fi->locals);
write_reg_list(ctx, &fi->registers);
blob_write_uint32(ctx->blob, fi->reg_alloc);
nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
fi->function = fxn;
+ fi->structured = blob_read_uint8(ctx->blob);
+
read_var_list(ctx, &fi->locals);
read_reg_list(ctx, &fi->registers);
fi->reg_alloc = blob_read_uint32(ctx->blob);
info.name = info.label = NULL;
blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
- write_var_list(&ctx, &nir->uniforms);
- write_var_list(&ctx, &nir->inputs);
- write_var_list(&ctx, &nir->outputs);
- write_var_list(&ctx, &nir->shared);
- write_var_list(&ctx, &nir->globals);
- write_var_list(&ctx, &nir->system_values);
+ write_var_list(&ctx, &nir->variables);
blob_write_uint32(blob, nir->num_inputs);
blob_write_uint32(blob, nir->num_uniforms);
ctx.nir->info = info;
- read_var_list(&ctx, &ctx.nir->uniforms);
- read_var_list(&ctx, &ctx.nir->inputs);
- read_var_list(&ctx, &ctx.nir->outputs);
- read_var_list(&ctx, &ctx.nir->shared);
- read_var_list(&ctx, &ctx.nir->globals);
- read_var_list(&ctx, &ctx.nir->system_values);
+ read_var_list(&ctx, &ctx.nir->variables);
ctx.nir->num_inputs = blob_read_uint32(blob);
ctx.nir->num_uniforms = blob_read_uint32(blob);