From 487a495cc09e15ab1582309ffee0fbeb302d5f45 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 5 Nov 2019 22:14:28 -0500 Subject: [PATCH] nir/serialize: remove up to 3 consecutive equal ALU instruction headers vec4 scalarized ALUs typically have 4 equal instruction headers, so remove the last 3. There are no bits left in the ALU header for more flags, so future extensions of NIR will have to use something like instr_type == 15 to describe more complex ALU instructions. Reviewed-by: Connor Abbott --- src/compiler/nir/nir_serialize.c | 81 +++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 16 deletions(-) diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index 0025f9b306c..85db23beef8 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -56,6 +56,10 @@ typedef struct { const struct glsl_type *last_interface_type; struct nir_variable_data last_var_data; + /* For skipping equal ALU headers (typical after scalarization). */ + nir_instr_type last_instr_type; + uintptr_t last_alu_header_offset; + /* Don't write optional data such as variable names. */ bool strip; } write_ctx; @@ -612,7 +616,8 @@ union packed_instr { unsigned writemask:4; unsigned op:9; unsigned packed_src_ssa_16bit:1; - unsigned _pad:2; + /* Scalarized ALUs always have the same header. */ + unsigned num_followup_alu_sharing_header:2; unsigned dest:8; } alu; struct { @@ -673,7 +678,8 @@ union packed_instr { /* Write "lo24" as low 24 bits in the first uint32. */ static void -write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header) +write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header, + nir_instr_type instr_type) { STATIC_ASSERT(sizeof(union packed_dest) == 1); union packed_dest dest; @@ -688,9 +694,43 @@ write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header) } else { dest.reg.is_indirect = !!(dst->reg.indirect); } - header.any.dest = dest.u8; - blob_write_uint32(ctx->blob, header.u32); + + /* Check if the current ALU instruction has the same header as the previous + * instruction that is also ALU. If it is, we don't have to write + * the current header. This is a typical occurence after scalarization. + */ + if (instr_type == nir_instr_type_alu) { + bool equal_header = false; + + if (ctx->last_instr_type == nir_instr_type_alu) { + assert(ctx->last_alu_header_offset); + union packed_instr *last_header = + (union packed_instr *)(ctx->blob->data + + ctx->last_alu_header_offset); + + /* Clear the field that counts ALUs with equal headers. */ + union packed_instr clean_header; + clean_header.u32 = last_header->u32; + clean_header.alu.num_followup_alu_sharing_header = 0; + + /* There can be at most 4 consecutive ALU instructions + * sharing the same header. + */ + if (last_header->alu.num_followup_alu_sharing_header < 3 && + header.u32 == clean_header.u32) { + last_header->alu.num_followup_alu_sharing_header++; + equal_header = true; + } + } + + if (!equal_header) { + ctx->last_alu_header_offset = ctx->blob->size; + blob_write_uint32(ctx->blob, header.u32); + } + } else { + blob_write_uint32(ctx->blob, header.u32); + } if (dst->is_ssa) { write_add_object(ctx, &dst->ssa); @@ -773,7 +813,7 @@ write_alu(write_ctx *ctx, const nir_alu_instr *alu) header.alu.op = alu->op; header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu); - write_dest(ctx, &alu->dest.dest, header); + write_dest(ctx, &alu->dest.dest, header, alu->instr.type); if (header.alu.packed_src_ssa_16bit) { for (unsigned i = 0; i < num_srcs; i++) { @@ -873,7 +913,7 @@ write_deref(write_ctx *ctx, const nir_deref_instr *deref) are_object_ids_16bit(ctx); } - write_dest(ctx, &deref->dest, header); + write_dest(ctx, &deref->dest, header, deref->instr.type); switch (deref->deref_type) { case nir_deref_type_var: @@ -1039,7 +1079,7 @@ write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) } if (nir_intrinsic_infos[intrin->intrinsic].has_dest) - write_dest(ctx, &intrin->dest, header); + write_dest(ctx, &intrin->dest, header, intrin->instr.type); else blob_write_uint32(ctx->blob, header.u32); @@ -1324,7 +1364,7 @@ write_tex(write_ctx *ctx, const nir_tex_instr *tex) header.tex.op = tex->op; header.tex.texture_array_size = tex->texture_array_size; - write_dest(ctx, &tex->dest, header); + write_dest(ctx, &tex->dest, header, tex->instr.type); blob_write_uint32(ctx->blob, tex->texture_index); blob_write_uint32(ctx->blob, tex->sampler_index); @@ -1397,7 +1437,7 @@ write_phi(write_ctx *ctx, const nir_phi_instr *phi) * and then store enough information so that a later fixup pass can fill * them in correctly. */ - write_dest(ctx, &phi->dest, header); + write_dest(ctx, &phi->dest, header, phi->instr.type); nir_foreach_phi_src(src, phi) { assert(src->src.is_ssa); @@ -1565,7 +1605,8 @@ write_instr(write_ctx *ctx, const nir_instr *instr) } } -static void +/* Return the number of instructions read. */ +static unsigned read_instr(read_ctx *ctx, nir_block *block) { STATIC_ASSERT(sizeof(union packed_instr) == 4); @@ -1575,8 +1616,9 @@ read_instr(read_ctx *ctx, nir_block *block) switch (header.any.instr_type) { case nir_instr_type_alu: - instr = &read_alu(ctx, header)->instr; - break; + for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++) + nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr); + return header.alu.num_followup_alu_sharing_header + 1; case nir_instr_type_deref: instr = &read_deref(ctx, header)->instr; break; @@ -1599,7 +1641,7 @@ read_instr(read_ctx *ctx, nir_block *block) * are read so that we can set their sources up. */ read_phi(ctx, block, header); - return; + return 1; case nir_instr_type_jump: instr = &read_jump(ctx, header)->instr; break; @@ -1613,6 +1655,7 @@ read_instr(read_ctx *ctx, nir_block *block) } nir_instr_insert_after_block(block, instr); + return 1; } static void @@ -1620,8 +1663,14 @@ write_block(write_ctx *ctx, const nir_block *block) { write_add_object(ctx, block); blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list)); - nir_foreach_instr(instr, block) + + ctx->last_instr_type = ~0; + ctx->last_alu_header_offset = 0; + + nir_foreach_instr(instr, block) { write_instr(ctx, instr); + ctx->last_instr_type = instr->type; + } } static void @@ -1636,8 +1685,8 @@ read_block(read_ctx *ctx, struct exec_list *cf_list) read_add_object(ctx, block); unsigned num_instrs = blob_read_uint32(ctx->blob); - for (unsigned i = 0; i < num_instrs; i++) { - read_instr(ctx, block); + for (unsigned i = 0; i < num_instrs;) { + i += read_instr(ctx, block); } } -- 2.30.2