From ed7a27719a9fceb7271dfd97b2217c787356f21a Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Sat, 11 May 2019 09:51:42 -0400 Subject: [PATCH] etnaviv: add alternative NIR compiler enable with ETNA_MESA_DEBUG=nir Signed-off-by: Jonathan Marek Reviewed-by: Philipp Zabel --- src/gallium/drivers/etnaviv/Makefile.sources | 2 + .../drivers/etnaviv/etnaviv_compiler.c | 62 +- .../drivers/etnaviv/etnaviv_compiler.h | 25 +- .../drivers/etnaviv/etnaviv_compiler_nir.c | 853 ++++++++++ .../etnaviv/etnaviv_compiler_nir_emit.h | 1396 +++++++++++++++++ src/gallium/drivers/etnaviv/etnaviv_debug.h | 1 + src/gallium/drivers/etnaviv/etnaviv_screen.c | 36 +- src/gallium/drivers/etnaviv/etnaviv_screen.h | 3 + src/gallium/drivers/etnaviv/etnaviv_shader.c | 26 +- src/gallium/drivers/etnaviv/etnaviv_shader.h | 14 +- src/gallium/drivers/etnaviv/meson.build | 5 +- src/gallium/winsys/etnaviv/drm/meson.build | 2 +- 12 files changed, 2388 insertions(+), 37 deletions(-) create mode 100644 src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c create mode 100644 src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h diff --git a/src/gallium/drivers/etnaviv/Makefile.sources b/src/gallium/drivers/etnaviv/Makefile.sources index 01e7e49a38a..1d0269acffb 100644 --- a/src/gallium/drivers/etnaviv/Makefile.sources +++ b/src/gallium/drivers/etnaviv/Makefile.sources @@ -18,6 +18,8 @@ C_SOURCES := \ etnaviv_clear_blit.h \ etnaviv_compiler.c \ etnaviv_compiler.h \ + etnaviv_compiler_nir.c \ + etnaviv_compiler_nir_emit.h \ etnaviv_context.c \ etnaviv_context.h \ etnaviv_debug.h \ diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c index 474e3d23405..8214d4f5770 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c @@ -2286,6 +2286,9 @@ copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant bool etna_compile_shader(struct etna_shader_variant *v) { + if (DBG_ENABLED(ETNA_DBG_NIR)) + return etna_compile_shader_nir(v); + /* Create scratch space that may be too large to fit on stack */ bool ret; @@ -2449,11 +2452,12 @@ etna_compile_shader(struct etna_shader_variant *v) etna_compile_fill_in_labels(c); /* fill in output structure */ - v->processor = c->info.processor; + v->stage = c->info.processor == PIPE_SHADER_FRAGMENT ? MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX; v->code_size = c->inst_ptr * 4; v->code = mem_dup(c->code, c->inst_ptr * 16); v->num_loops = c->num_loops; v->num_temps = c->next_free_native; + v->vs_id_in_reg = -1; v->vs_pos_out_reg = -1; v->vs_pointsize_out_reg = -1; v->ps_color_out_reg = -1; @@ -2483,7 +2487,7 @@ extern const char *tgsi_swizzle_names[]; void etna_dump_shader(const struct etna_shader_variant *shader) { - if (shader->processor == PIPE_SHADER_VERTEX) + if (shader->stage == MESA_SHADER_VERTEX) printf("VERT\n"); else printf("FRAG\n"); @@ -2502,22 +2506,42 @@ etna_dump_shader(const struct etna_shader_variant *shader) shader->uniforms.imm_data[idx], shader->uniforms.imm_contents[idx]); } - printf("inputs:\n"); - for (int idx = 0; idx < shader->infile.num_reg; ++idx) { - printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg, - tgsi_semantic_names[shader->infile.reg[idx].semantic.Name], - shader->infile.reg[idx].semantic.Index, - shader->infile.reg[idx].num_components); - } - printf("outputs:\n"); - for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { - printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg, - tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name], - shader->outfile.reg[idx].semantic.Index, - shader->outfile.reg[idx].num_components); + + if (DBG_ENABLED(ETNA_DBG_NIR)) { + printf("inputs:\n"); + for (int idx = 0; idx < shader->infile.num_reg; ++idx) { + printf(" [%i] name=%s comps=%i\n", shader->infile.reg[idx].reg, + (shader->stage == MESA_SHADER_VERTEX) ? + gl_vert_attrib_name(shader->infile.reg[idx].slot) : + gl_varying_slot_name(shader->infile.reg[idx].slot), + shader->infile.reg[idx].num_components); + } + printf("outputs:\n"); + for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { + printf(" [%i] name=%s comps=%i\n", shader->outfile.reg[idx].reg, + (shader->stage == MESA_SHADER_VERTEX) ? + gl_varying_slot_name(shader->outfile.reg[idx].slot) : + gl_frag_result_name(shader->outfile.reg[idx].slot), + shader->outfile.reg[idx].num_components); + } + } else { + printf("inputs:\n"); + for (int idx = 0; idx < shader->infile.num_reg; ++idx) { + printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg, + tgsi_semantic_names[shader->infile.reg[idx].semantic.Name], + shader->infile.reg[idx].semantic.Index, + shader->infile.reg[idx].num_components); + } + printf("outputs:\n"); + for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { + printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg, + tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name], + shader->outfile.reg[idx].semantic.Index, + shader->outfile.reg[idx].num_components); + } } printf("special:\n"); - if (shader->processor == PIPE_SHADER_VERTEX) { + if (shader->stage == MESA_SHADER_VERTEX) { printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg); printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg); printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing); @@ -2531,6 +2555,9 @@ etna_dump_shader(const struct etna_shader_variant *shader) void etna_destroy_shader(struct etna_shader_variant *shader) { + if (DBG_ENABLED(ETNA_DBG_NIR)) + return etna_destroy_shader_nir(shader); + assert(shader); FREE(shader->code); @@ -2554,6 +2581,9 @@ bool etna_link_shader(struct etna_shader_link_info *info, const struct etna_shader_variant *vs, const struct etna_shader_variant *fs) { + if (DBG_ENABLED(ETNA_DBG_NIR)) + return etna_link_shader_nir(info, vs, fs); + int comp_ofs = 0; /* For each fragment input we need to find the associated vertex shader * output, which can be found by matching on semantic name and index. A diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.h b/src/gallium/drivers/etnaviv/etnaviv_compiler.h index 48b1b218750..b7feeb6d49b 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.h +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.h @@ -32,6 +32,7 @@ #include "etnaviv_shader.h" #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" +#include "compiler/shader_enums.h" /* XXX some of these are pretty arbitrary limits, may be better to switch * to dynamic allocation at some point. @@ -47,6 +48,7 @@ struct etna_shader_inout { int reg; /* native register */ struct tgsi_declaration_semantic semantic; /* tgsi semantic name and index */ + int slot; /* nir: gl_varying_slot or gl_vert_attrib */ int num_components; }; @@ -59,7 +61,7 @@ struct etna_shader_io_file { struct etna_shader_variant { uint32_t id; /* for debug */ - uint processor; /* TGSI_PROCESSOR_... */ + gl_shader_stage stage; uint32_t code_size; /* code size in uint32 words */ uint32_t *code; unsigned num_loops; @@ -77,12 +79,13 @@ struct etna_shader_variant { /* outputs (for linking) */ struct etna_shader_io_file outfile; - /* index into outputs (for linking) */ + /* index into outputs (for linking) - only for TGSI compiler */ int output_count_per_semantic[TGSI_SEMANTIC_COUNT]; struct etna_shader_inout * *output_per_semantic_list; /* list of pointers to outputs */ struct etna_shader_inout **output_per_semantic[TGSI_SEMANTIC_COUNT]; - /* special outputs (vs only) */ + /* special inputs/outputs (vs only) */ + int vs_id_in_reg; /* vertexid+instanceid input */ int vs_pos_out_reg; /* VS position output */ int vs_pointsize_out_reg; /* VS point size output */ uint32_t vs_load_balancing; @@ -134,4 +137,20 @@ etna_link_shader(struct etna_shader_link_info *info, void etna_destroy_shader(struct etna_shader_variant *shader); +/* NIR compiler */ + +bool +etna_compile_shader_nir(struct etna_shader_variant *shader); + +void +etna_dump_shader_nir(const struct etna_shader_variant *shader); + +bool +etna_link_shader_nir(struct etna_shader_link_info *info, + const struct etna_shader_variant *vs, + const struct etna_shader_variant *fs); + +void +etna_destroy_shader_nir(struct etna_shader_variant *shader); + #endif diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c new file mode 100644 index 00000000000..44e0c1a29fb --- /dev/null +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c @@ -0,0 +1,853 @@ +/* + * Copyright (c) 2012-2019 Etnaviv Project + * Copyright (c) 2019 Zodiac Inflight Innovations + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jonathan Marek + * Wladimir J. van der Laan + */ + +#include "etnaviv_compiler.h" +#include "etnaviv_asm.h" +#include "etnaviv_context.h" +#include "etnaviv_debug.h" +#include "etnaviv_disasm.h" +#include "etnaviv_uniforms.h" +#include "etnaviv_util.h" + +#include +#include "util/u_memory.h" +#include "util/register_allocate.h" +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_worklist.h" + +#include "util/u_half.h" + +struct etna_compile { + nir_shader *nir; +#define is_fs(c) ((c)->nir->info.stage == MESA_SHADER_FRAGMENT) + const struct etna_specs *specs; + struct etna_shader_variant *variant; + + /* register assigned to each output, indexed by driver_location */ + unsigned output_reg[ETNA_NUM_INPUTS]; + + /* block # to instr index */ + unsigned *block_ptr; + + /* Code generation */ + int inst_ptr; /* current instruction pointer */ + struct etna_inst code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE]; + + /* There was an error during compilation */ + bool error; +}; + +#define compile_error(ctx, args...) ({ \ + printf(args); \ + ctx->error = true; \ + assert(0); \ +}) + +/* io related lowering + * run after lower_int_to_float because it adds i2f/f2i ops + */ +static void +etna_lower_io(nir_shader *shader, struct etna_shader_variant *v) +{ + bool rb_swap = shader->info.stage == MESA_SHADER_FRAGMENT && v->key.frag_rb_swap; + + unsigned color_location = 0; + nir_foreach_variable(var, &shader->outputs) { + switch (var->data.location) { + case FRAG_RESULT_COLOR: + case FRAG_RESULT_DATA0: + color_location = var->data.driver_location; + break; + } + } + + nir_foreach_function(function, shader) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_front_face: { + /* front face inverted (run after int_to_float, so invert as float) */ + b.cursor = nir_after_instr(instr); + + nir_ssa_def *ssa = nir_seq(&b, &intr->dest.ssa, nir_imm_float(&b, 0.0)); + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, + nir_src_for_ssa(ssa), + ssa->parent_instr); + } break; + case nir_intrinsic_store_output: { + if (!rb_swap || nir_intrinsic_base(intr) != color_location) + break; + b.cursor = nir_before_instr(instr); + + nir_ssa_def *ssa = nir_mov(&b, intr->src[0].ssa); + nir_alu_instr *alu = nir_instr_as_alu(ssa->parent_instr); + alu->src[0].swizzle[0] = 2; + alu->src[0].swizzle[2] = 0; + nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa)); + } break; + case nir_intrinsic_load_instance_id: { + b.cursor = nir_after_instr(instr); + nir_ssa_def *ssa = nir_i2f32(&b, &intr->dest.ssa); + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, + nir_src_for_ssa(ssa), + ssa->parent_instr); + } break; + case nir_intrinsic_load_uniform: { + /* multiply by 16 and convert to int */ + b.cursor = nir_before_instr(instr); + nir_ssa_def *ssa = nir_f2u32(&b, nir_fmul(&b, intr->src[0].ssa, + nir_imm_float(&b, 16.0f))); + nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa)); + } break; + default: + break; + } + } + + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + nir_src *coord = NULL; + nir_src *lod_bias = NULL; + unsigned lod_bias_idx; + + assert(tex->sampler_index == tex->texture_index); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + coord = &tex->src[i].src; + break; + case nir_tex_src_bias: + case nir_tex_src_lod: + assert(!lod_bias); + lod_bias = &tex->src[i].src; + lod_bias_idx = i; + break; + default: + assert(0); + break; + } + } + + if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { + /* use a dummy load_uniform here to represent texcoord scale */ + b.cursor = nir_before_instr(instr); + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_uniform); + nir_intrinsic_set_base(load, ~tex->sampler_index); + load->num_components = 2; + load->src[0] = nir_src_for_ssa(nir_imm_float(&b, 0.0f)); + nir_ssa_dest_init(&load->instr, &load->dest, 2, 32, NULL); + nir_intrinsic_set_type(load, nir_type_float); + + nir_builder_instr_insert(&b, &load->instr); + + nir_ssa_def *new_coord = nir_fmul(&b, coord->ssa, &load->dest.ssa); + nir_instr_rewrite_src(&tex->instr, coord, nir_src_for_ssa(new_coord)); + } + + /* pre HALTI5 needs texture sources in a single source */ + + if (!lod_bias || v->shader->specs->halti >= 5) + continue; + + assert(coord && lod_bias && tex->coord_components < 4); + + nir_alu_instr *vec = nir_alu_instr_create(shader, nir_op_vec4); + for (unsigned i = 0; i < tex->coord_components; i++) { + vec->src[i].src = nir_src_for_ssa(coord->ssa); + vec->src[i].swizzle[0] = i; + } + for (unsigned i = tex->coord_components; i < 4; i++) + vec->src[i].src = nir_src_for_ssa(lod_bias->ssa); + + vec->dest.write_mask = 0xf; + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, 4, 32, NULL); + + nir_tex_instr_remove_src(tex, lod_bias_idx); + nir_instr_rewrite_src(&tex->instr, coord, nir_src_for_ssa(&vec->dest.dest.ssa)); + tex->coord_components = 4; + + nir_instr_insert_before(&tex->instr, &vec->instr); + } + } + } +} + +static void +etna_lower_alu_to_scalar(nir_shader *shader, const struct etna_specs *specs) +{ + BITSET_DECLARE(scalar_ops, nir_num_opcodes); + BITSET_ZERO(scalar_ops); + + BITSET_SET(scalar_ops, nir_op_frsq); + BITSET_SET(scalar_ops, nir_op_frcp); + BITSET_SET(scalar_ops, nir_op_flog2); + BITSET_SET(scalar_ops, nir_op_fexp2); + BITSET_SET(scalar_ops, nir_op_fsqrt); + BITSET_SET(scalar_ops, nir_op_fcos); + BITSET_SET(scalar_ops, nir_op_fsin); + BITSET_SET(scalar_ops, nir_op_fdiv); + + if (!specs->has_halti2_instructions) + BITSET_SET(scalar_ops, nir_op_fdot2); + + nir_lower_alu_to_scalar(shader, scalar_ops); +} + +static void +etna_lower_alu_impl(nir_function_impl *impl, struct etna_compile *c) +{ + nir_shader *shader = impl->function->shader; + + nir_builder b; + nir_builder_init(&b, impl); + + /* in a seperate loop so we can apply the multiple-uniform logic to the new fmul */ + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + /* multiply sin/cos src by constant + * TODO: do this earlier (but it breaks const_prop opt) + */ + if (alu->op == nir_op_fsin || alu->op == nir_op_fcos) { + b.cursor = nir_before_instr(instr); + + nir_ssa_def *imm = c->specs->has_new_transcendentals ? + nir_imm_float(&b, 1.0 / M_PI) : + nir_imm_float(&b, 2.0 / M_PI); + + nir_instr_rewrite_src(instr, &alu->src[0].src, + nir_src_for_ssa(nir_fmul(&b, alu->src[0].src.ssa, imm))); + } + + /* change transcendental ops to vec2 and insert vec1 mul for the result + * TODO: do this earlier (but it breaks with optimizations) + */ + if (c->specs->has_new_transcendentals && ( + alu->op == nir_op_fdiv || alu->op == nir_op_flog2 || + alu->op == nir_op_fsin || alu->op == nir_op_fcos)) { + nir_ssa_def *ssa = &alu->dest.dest.ssa; + + assert(ssa->num_components == 1); + + nir_alu_instr *mul = nir_alu_instr_create(shader, nir_op_fmul); + mul->src[0].src = mul->src[1].src = nir_src_for_ssa(ssa); + mul->src[1].swizzle[0] = 1; + + mul->dest.write_mask = 1; + nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL); + + ssa->num_components = 2; + + mul->dest.saturate = alu->dest.saturate; + alu->dest.saturate = 0; + + nir_instr_insert_after(instr, &mul->instr); + + nir_ssa_def_rewrite_uses_after(ssa, nir_src_for_ssa(&mul->dest.dest.ssa), &mul->instr); + } + } + } +} + +static void etna_lower_alu(nir_shader *shader, struct etna_compile *c) +{ + nir_foreach_function(function, shader) { + if (function->impl) + etna_lower_alu_impl(function->impl, c); + } +} + +static void +emit_inst(struct etna_compile *c, struct etna_inst *inst) +{ + c->code[c->inst_ptr++] = *inst; +} + +/* to map nir srcs should to etna_inst srcs */ +enum { + SRC_0_1_2 = (0 << 0) | (1 << 2) | (2 << 4), + SRC_0_1_X = (0 << 0) | (1 << 2) | (3 << 4), + SRC_0_X_X = (0 << 0) | (3 << 2) | (3 << 4), + SRC_0_X_1 = (0 << 0) | (3 << 2) | (1 << 4), + SRC_0_1_0 = (0 << 0) | (1 << 2) | (0 << 4), + SRC_X_X_0 = (3 << 0) | (3 << 2) | (0 << 4), + SRC_0_X_0 = (0 << 0) | (3 << 2) | (0 << 4), +}; + +/* info to translate a nir op to etna_inst */ +struct etna_op_info { + uint8_t opcode; /* INST_OPCODE_ */ + uint8_t src; /* SRC_ enum */ + uint8_t cond; /* INST_CONDITION_ */ + uint8_t type; /* INST_TYPE_ */ +}; + +static const struct etna_op_info etna_ops[] = { + [0 ... nir_num_opcodes - 1] = {0xff}, +#undef TRUE +#undef FALSE +#define OPCT(nir, op, src, cond, type) [nir_op_##nir] = { \ + INST_OPCODE_##op, \ + SRC_##src, \ + INST_CONDITION_##cond, \ + INST_TYPE_##type \ +} +#define OPC(nir, op, src, cond) OPCT(nir, op, src, cond, F32) +#define OP(nir, op, src) OPC(nir, op, src, TRUE) + OP(mov, MOV, X_X_0), OP(fneg, MOV, X_X_0), OP(fabs, MOV, X_X_0), OP(fsat, MOV, X_X_0), + OP(fmul, MUL, 0_1_X), OP(fadd, ADD, 0_X_1), OP(ffma, MAD, 0_1_2), + OP(fdot2, DP2, 0_1_X), OP(fdot3, DP3, 0_1_X), OP(fdot4, DP4, 0_1_X), + OPC(fmin, SELECT, 0_1_0, GT), OPC(fmax, SELECT, 0_1_0, LT), + OP(ffract, FRC, X_X_0), OP(frcp, RCP, X_X_0), OP(frsq, RSQ, X_X_0), + OP(fsqrt, SQRT, X_X_0), OP(fsin, SIN, X_X_0), OP(fcos, COS, X_X_0), + OP(fsign, SIGN, X_X_0), OP(ffloor, FLOOR, X_X_0), OP(fceil, CEIL, X_X_0), + OP(flog2, LOG, X_X_0), OP(fexp2, EXP, X_X_0), + OPC(seq, SET, 0_1_X, EQ), OPC(sne, SET, 0_1_X, NE), OPC(sge, SET, 0_1_X, GE), OPC(slt, SET, 0_1_X, LT), + OPC(fcsel, SELECT, 0_1_2, NZ), + OP(fdiv, DIV, 0_1_X), + OP(fddx, DSX, 0_X_0), OP(fddy, DSY, 0_X_0), + + /* integer opcodes */ + OPCT(i2f32, I2F, 0_X_X, TRUE, S32), + OPCT(f2u32, F2I, 0_X_X, TRUE, U32), +}; + +static void +etna_emit_block_start(struct etna_compile *c, unsigned block) +{ + c->block_ptr[block] = c->inst_ptr; +} + +static void +etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst, + struct etna_inst_src src[3], bool saturate) +{ + struct etna_op_info ei = etna_ops[op]; + + assert(ei.opcode != 0xff); + + struct etna_inst inst = { + .opcode = ei.opcode, + .type = ei.type, + .cond = ei.cond, + .dst = dst, + .sat = saturate, + }; + + switch (op) { + case nir_op_fdiv: + case nir_op_flog2: + case nir_op_fsin: + case nir_op_fcos: + if (c->specs->has_new_transcendentals) + inst.tex.amode = 1; + /* fall through */ + case nir_op_frsq: + case nir_op_frcp: + case nir_op_fexp2: + case nir_op_fsqrt: + case nir_op_i2f32: + case nir_op_f2u32: + /* for these instructions we want src to be in x component + * note: on HALTI2+ i2f/f2u are not scalar but we only use them this way currently + */ + src[0].swiz = inst_swiz_compose(src[0].swiz, + INST_SWIZ_BROADCAST(ffs(inst.dst.write_mask)-1)); + default: + break; + } + + for (unsigned j = 0; j < 3; j++) { + unsigned i = ((ei.src >> j*2) & 3); + if (i < 3) + inst.src[j] = src[i]; + } + + emit_inst(c, &inst); +} + +static void +etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz, + struct etna_inst_dst dst, struct etna_inst_src coord, + struct etna_inst_src lod_bias) +{ + struct etna_inst inst = { + .dst = dst, + .tex.id = texid + (is_fs(c) ? 0 : c->specs->vertex_sampler_offset), + .tex.swiz = dst_swiz, + .src[0] = coord, + }; + + if (lod_bias.use) + inst.src[1] = lod_bias; + + switch (op) { + case nir_texop_tex: inst.opcode = INST_OPCODE_TEXLD; break; + case nir_texop_txb: inst.opcode = INST_OPCODE_TEXLDB; break; + case nir_texop_txl: inst.opcode = INST_OPCODE_TEXLDL; break; + default: + assert(0); + } + + emit_inst(c, &inst); +} + +static void +etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition) +{ + if (!condition.use) { + emit_inst(c, &(struct etna_inst) {.opcode = INST_OPCODE_BRANCH, .imm = block }); + return; + } + + struct etna_inst inst = { + .opcode = INST_OPCODE_BRANCH, + .cond = INST_CONDITION_NOT, + .type = INST_TYPE_U32, + .src[0] = condition, + .imm = block, + }; + inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3); + emit_inst(c, &inst); +} + +static void +etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition) +{ + if (!condition.use) { + emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_TEXKILL }); + return; + } + + struct etna_inst inst = { + .opcode = INST_OPCODE_TEXKILL, + .cond = INST_CONDITION_GZ, + .src[0] = condition, + }; + inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3); + emit_inst(c, &inst); +} + +static void +etna_emit_output(struct etna_compile *c, unsigned index, struct etna_inst_src src) +{ + c->output_reg[index] = src.reg; +} + +static void +etna_emit_load_ubo(struct etna_compile *c, struct etna_inst_dst dst, + struct etna_inst_src src, struct etna_inst_src base) +{ + emit_inst(c, &(struct etna_inst) { + .opcode = INST_OPCODE_LOAD, + .type = INST_TYPE_U32, + .dst = dst, + .src[0] = src, + .src[1] = base, + }); +} + +#define OPT(nir, pass, ...) ({ \ + bool this_progress = false; \ + NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ + this_progress; \ +}) +#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) + +static void +etna_optimize_loop(nir_shader *s) +{ + bool progress; + do { + progress = false; + + OPT_V(s, nir_lower_vars_to_ssa); + progress |= OPT(s, nir_opt_copy_prop_vars); + progress |= OPT(s, nir_copy_prop); + progress |= OPT(s, nir_opt_dce); + progress |= OPT(s, nir_opt_cse); + progress |= OPT(s, nir_opt_peephole_select, 16, true, true); + progress |= OPT(s, nir_opt_intrinsics); + progress |= OPT(s, nir_opt_algebraic); + progress |= OPT(s, nir_opt_constant_folding); + progress |= OPT(s, nir_opt_dead_cf); + if (OPT(s, nir_opt_trivial_continues)) { + progress = true; + /* If nir_opt_trivial_continues makes progress, then we need to clean + * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll + * to make progress. + */ + OPT(s, nir_copy_prop); + OPT(s, nir_opt_dce); + } + progress |= OPT(s, nir_opt_loop_unroll, nir_var_all); + progress |= OPT(s, nir_opt_if, false); + progress |= OPT(s, nir_opt_remove_phis); + progress |= OPT(s, nir_opt_undef); + } + while (progress); +} + +static int +etna_glsl_type_size(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +static void +copy_uniform_state_to_shader(struct etna_shader_variant *sobj, uint64_t *consts, unsigned count) +{ + struct etna_shader_uniform_info *uinfo = &sobj->uniforms; + + uinfo->imm_count = count * 4; + uinfo->imm_data = MALLOC(uinfo->imm_count * sizeof(*uinfo->imm_data)); + uinfo->imm_contents = MALLOC(uinfo->imm_count * sizeof(*uinfo->imm_contents)); + + for (unsigned i = 0; i < uinfo->imm_count; i++) { + uinfo->imm_data[i] = consts[i]; + uinfo->imm_contents[i] = consts[i] >> 32; + } + + etna_set_shader_uniforms_dirty_flags(sobj); +} + +#include "etnaviv_compiler_nir_emit.h" + +bool +etna_compile_shader_nir(struct etna_shader_variant *v) +{ + if (unlikely(!v)) + return false; + + struct etna_compile *c = CALLOC_STRUCT(etna_compile); + if (!c) + return false; + + c->variant = v; + c->specs = v->shader->specs; + c->nir = nir_shader_clone(NULL, v->shader->nir); + + nir_shader *s = c->nir; + const struct etna_specs *specs = c->specs; + + v->stage = s->info.stage; + v->num_loops = 0; /* TODO */ + v->vs_id_in_reg = -1; + v->vs_pos_out_reg = -1; + v->vs_pointsize_out_reg = -1; + v->ps_color_out_reg = 0; /* 0 for shader that doesn't write fragcolor.. */ + v->ps_depth_out_reg = -1; + + /* setup input linking */ + struct etna_shader_io_file *sf = &v->infile; + if (s->info.stage == MESA_SHADER_VERTEX) { + nir_foreach_variable(var, &s->inputs) { + unsigned idx = var->data.driver_location; + sf->reg[idx].reg = idx; + sf->reg[idx].slot = var->data.location; + sf->reg[idx].num_components = 4; /* TODO */ + sf->num_reg = MAX2(sf->num_reg, idx+1); + } + } else { + unsigned count = 0; + nir_foreach_variable(var, &s->inputs) { + unsigned idx = var->data.driver_location; + sf->reg[idx].reg = idx + 1; + sf->reg[idx].slot = var->data.location; + sf->reg[idx].num_components = 4; /* TODO */ + sf->num_reg = MAX2(sf->num_reg, idx+1); + count++; + } + assert(sf->num_reg == count); + } + + NIR_PASS_V(s, nir_lower_io, nir_var_all, etna_glsl_type_size, + (nir_lower_io_options)0); + + OPT_V(s, nir_lower_regs_to_ssa); + OPT_V(s, nir_lower_vars_to_ssa); + OPT_V(s, nir_lower_indirect_derefs, nir_var_all); + OPT_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u }); + OPT_V(s, etna_lower_alu_to_scalar, specs); + + etna_optimize_loop(s); + + /* use opt_algebraic between int_to_float and boot_to_float because + * int_to_float emits ftrunc, and ftrunc lowering generates bool ops + */ + OPT_V(s, nir_lower_int_to_float); + OPT_V(s, nir_opt_algebraic); + OPT_V(s, nir_lower_bool_to_float); + + /* after int to float because insert i2f for instance_id */ + OPT_V(s, etna_lower_io, v); + + etna_optimize_loop(s); + + if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) + nir_print_shader(s, stdout); + + while( OPT(s, nir_opt_vectorize) ); + OPT_V(s, etna_lower_alu_to_scalar, specs); + + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp); + NIR_PASS_V(s, nir_opt_algebraic_late); + + NIR_PASS_V(s, nir_move_vec_src_uses_to_dest); + NIR_PASS_V(s, nir_copy_prop); + NIR_PASS_V(s, nir_lower_to_source_mods, ~nir_lower_int_source_mods); + /* need copy prop after uses_to_dest, and before src mods: see + * dEQP-GLES2.functional.shaders.random.all_features.fragment.95 + */ + + NIR_PASS_V(s, nir_opt_dce); + + NIR_PASS_V(s, etna_lower_alu, c); + + if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) + nir_print_shader(s, stdout); + + uint64_t consts[ETNA_MAX_IMM] = {}; + + unsigned block_ptr[nir_shader_get_entrypoint(s)->num_blocks]; + c->block_ptr = block_ptr; + struct emit_options options = { + .max_temps = ETNA_MAX_TEMPS, + .max_consts = ETNA_MAX_IMM / 4, + .id_reg = sf->num_reg, + .single_const_src = c->specs->halti < 5, + .etna_new_transcendentals = c->specs->has_new_transcendentals, + .user = c, + .consts = consts, + }; + + unsigned num_consts; + bool ok = emit_shader(c->nir, &options, &v->num_temps, &num_consts); + assert(ok); + + /* empty shader, emit NOP */ + if (!c->inst_ptr) + emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_NOP }); + + /* assemble instructions, fixing up labels */ + uint32_t *code = MALLOC(c->inst_ptr * 16 + 1024); + for (unsigned i = 0; i < c->inst_ptr; i++) { + struct etna_inst *inst = &c->code[i]; + if (inst->opcode == INST_OPCODE_BRANCH) + inst->imm = block_ptr[inst->imm]; + + inst->halti5 = specs->halti >= 5; + etna_assemble(&code[i * 4], inst); + } + + v->code_size = c->inst_ptr * 4; + v->code = code; + v->needs_icache = c->inst_ptr > specs->max_instructions; + + copy_uniform_state_to_shader(v, consts, num_consts); + + if (s->info.stage == MESA_SHADER_FRAGMENT) { + v->input_count_unk8 = 31; /* XXX what is this */ + + nir_foreach_variable(var, &s->outputs) { + unsigned reg = c->output_reg[var->data.driver_location]; + switch (var->data.location) { + case FRAG_RESULT_COLOR: + case FRAG_RESULT_DATA0: /* DATA0 is used by gallium shaders for color */ + v->ps_color_out_reg = reg; + break; + case FRAG_RESULT_DEPTH: + v->ps_depth_out_reg = reg; + break; + default: + compile_error(c, "Unsupported fs output %s\n", gl_frag_result_name(var->data.location)); + } + } + assert(v->ps_depth_out_reg <= 0); + v->outfile.num_reg = 0; + ralloc_free(c->nir); + FREE(c); + return true; + } + + v->input_count_unk8 = DIV_ROUND_UP(v->infile.num_reg + 4, 16); /* XXX what is this */ + + sf = &v->outfile; + sf->num_reg = 0; + nir_foreach_variable(var, &s->outputs) { + unsigned native = c->output_reg[var->data.driver_location]; + + if (var->data.location == VARYING_SLOT_POS) { + v->vs_pos_out_reg = native; + continue; + } + + if (var->data.location == VARYING_SLOT_PSIZ) { + v->vs_pointsize_out_reg = native; + continue; + } + + sf->reg[sf->num_reg].reg = native; + sf->reg[sf->num_reg].slot = var->data.location; + sf->reg[sf->num_reg].num_components = 4; /* TODO */ + sf->num_reg++; + } + + /* fill in "mystery meat" load balancing value. This value determines how + * work is scheduled between VS and PS + * in the unified shader architecture. More precisely, it is determined from + * the number of VS outputs, as well as chip-specific + * vertex output buffer size, vertex cache size, and the number of shader + * cores. + * + * XXX this is a conservative estimate, the "optimal" value is only known for + * sure at link time because some + * outputs may be unused and thus unmapped. Then again, in the general use + * case with GLSL the vertex and fragment + * shaders are linked already before submitting to Gallium, thus all outputs + * are used. + * + * note: TGSI compiler counts all outputs (including position and pointsize), here + * v->outfile.num_reg only counts varyings, +1 to compensate for the position output + * TODO: might have a problem that we don't count pointsize when it is used + */ + + int half_out = v->outfile.num_reg / 2 + 1; + assert(half_out); + + uint32_t b = ((20480 / (specs->vertex_output_buffer_size - + 2 * half_out * specs->vertex_cache_size)) + + 9) / + 10; + uint32_t a = (b + 256 / (specs->shader_core_count * half_out)) / 2; + v->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) | + VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) | + VIVS_VS_LOAD_BALANCING_C(0x3f) | + VIVS_VS_LOAD_BALANCING_D(0x0f); + + ralloc_free(c->nir); + FREE(c); + return true; +} + +void +etna_destroy_shader_nir(struct etna_shader_variant *shader) +{ + assert(shader); + + FREE(shader->code); + FREE(shader->uniforms.imm_data); + FREE(shader->uniforms.imm_contents); + FREE(shader); +} + +static const struct etna_shader_inout * +etna_shader_vs_lookup(const struct etna_shader_variant *sobj, + const struct etna_shader_inout *in) +{ + for (int i = 0; i < sobj->outfile.num_reg; i++) + if (sobj->outfile.reg[i].slot == in->slot) + return &sobj->outfile.reg[i]; + + return NULL; +} + +bool +etna_link_shader_nir(struct etna_shader_link_info *info, + const struct etna_shader_variant *vs, + const struct etna_shader_variant *fs) +{ + int comp_ofs = 0; + /* For each fragment input we need to find the associated vertex shader + * output, which can be found by matching on semantic name and index. A + * binary search could be used because the vs outputs are sorted by their + * semantic index and grouped by semantic type by fill_in_vs_outputs. + */ + assert(fs->infile.num_reg < ETNA_NUM_INPUTS); + info->pcoord_varying_comp_ofs = -1; + + for (int idx = 0; idx < fs->infile.num_reg; ++idx) { + const struct etna_shader_inout *fsio = &fs->infile.reg[idx]; + const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio); + struct etna_varying *varying; + bool interpolate_always = true; + + assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings)); + + if (fsio->reg > info->num_varyings) + info->num_varyings = fsio->reg; + + varying = &info->varyings[fsio->reg - 1]; + varying->num_components = fsio->num_components; + + if (!interpolate_always) /* colors affected by flat shading */ + varying->pa_attributes = 0x200; + else /* texture coord or other bypasses flat shading */ + varying->pa_attributes = 0x2f1; + + varying->use[0] = VARYING_COMPONENT_USE_UNUSED; + varying->use[1] = VARYING_COMPONENT_USE_UNUSED; + varying->use[2] = VARYING_COMPONENT_USE_UNUSED; + varying->use[3] = VARYING_COMPONENT_USE_UNUSED; + + /* point coord is an input to the PS without matching VS output, + * so it gets a varying slot without being assigned a VS register. + */ + if (fsio->slot == VARYING_SLOT_PNTC) { + varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X; + varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y; + + info->pcoord_varying_comp_ofs = comp_ofs; + } else { + if (vsio == NULL) { /* not found -- link error */ + BUG("Semantic value not found in vertex shader outputs\n"); + return true; + } + varying->reg = vsio->reg; + } + + comp_ofs += varying->num_components; + } + + assert(info->num_varyings == fs->infile.num_reg); + + return false; +} diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h new file mode 100644 index 00000000000..0d30fe54dfd --- /dev/null +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h @@ -0,0 +1,1396 @@ +/* + * Copyright (c) 2019 Zodiac Inflight Innovations + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#include "etnaviv_asm.h" +#include "etnaviv_context.h" + +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_worklist.h" +#include "util/register_allocate.h" + +struct emit_options { + unsigned max_temps; /* max # of vec4 registers */ + unsigned max_consts; /* max # of vec4 consts */ + unsigned id_reg; /* register with vertex/instance id */ + bool single_const_src : 1; /* limited to 1 vec4 const src */ + bool etna_new_transcendentals : 1; + void *user; + uint64_t *consts; +}; + +#define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3]) +#define SRC_DISABLE ((hw_src){}) +#define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s}) +#define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s}) + +#define option(name) (state->options->name) +#define emit(type, args...) etna_emit_##type(state->options->user, args) + +typedef struct etna_inst_dst hw_dst; +typedef struct etna_inst_src hw_src; + +enum { + BYPASS_DST = 1, + BYPASS_SRC = 2, +}; + +struct state { + const struct emit_options *options; + unsigned const_count; + + nir_shader *shader; + nir_function_impl *impl; + + /* ra state */ + struct ra_graph *g; + struct ra_regs *regs; + unsigned *live_map; + unsigned num_nodes; +}; + +static inline hw_src +src_swizzle(hw_src src, unsigned swizzle) +{ + src.swiz = inst_swiz_compose(src.swiz, swizzle); + return src; +} + +static inline bool is_sysval(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + return intr->intrinsic == nir_intrinsic_load_front_face || + intr->intrinsic == nir_intrinsic_load_frag_coord; +} + +/* constants are represented as 64-bit ints + * 32-bit for the value and 32-bit for the type (imm, uniform, etc) + */ + +#define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)} +#define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x) +#define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x) +#define UNIFORM_BASE(x) CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR, x) +#define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x) + +static int +const_add(uint64_t *c, uint64_t value) +{ + for (unsigned i = 0; i < 4; i++) { + if (c[i] == value || !c[i]) { + c[i] = value; + return i; + } + } + return -1; +} + +static hw_src +const_src(struct state *state, nir_const_value *value, unsigned num_components) +{ + unsigned i; + int swiz = -1; + for (i = 0; swiz < 0; i++) { + uint64_t *a = &option(consts)[i*4]; + uint64_t save[4]; + memcpy(save, a, sizeof(save)); + swiz = 0; + for (unsigned j = 0; j < num_components; j++) { + int c = const_add(a, value[j].u64); + if (c < 0) { + memcpy(a, save, sizeof(save)); + swiz = -1; + break; + } + swiz |= c << j * 2; + } + } + + assert(i <= option(max_consts)); + state->const_count = MAX2(state->const_count, i); + + return SRC_CONST(i - 1, swiz); +} + +struct ssa_reg { + uint8_t idx; + uint8_t src_swizzle; + uint8_t dst_swizzle; + uint8_t write_mask; +}; + +/* Swizzles and write masks can be used to layer virtual non-interfering + * registers on top of the real VEC4 registers. For example, the virtual + * VEC3_XYZ register and the virtual SCALAR_W register that use the same + * physical VEC4 base register do not interfere. + */ +enum { + REG_CLASS_VIRT_SCALAR, + REG_CLASS_VIRT_VEC2, + REG_CLASS_VIRT_VEC3, + REG_CLASS_VEC4, + /* special vec2 class for fast transcendentals, limited to XY or ZW */ + REG_CLASS_VIRT_VEC2T, + NUM_REG_CLASSES, +} reg_class; + +enum { + REG_TYPE_VEC4, + REG_TYPE_VIRT_VEC3_XYZ, + REG_TYPE_VIRT_VEC3_XYW, + REG_TYPE_VIRT_VEC3_XZW, + REG_TYPE_VIRT_VEC3_YZW, + REG_TYPE_VIRT_VEC2_XY, + REG_TYPE_VIRT_VEC2_XZ, + REG_TYPE_VIRT_VEC2_XW, + REG_TYPE_VIRT_VEC2_YZ, + REG_TYPE_VIRT_VEC2_YW, + REG_TYPE_VIRT_VEC2_ZW, + REG_TYPE_VIRT_SCALAR_X, + REG_TYPE_VIRT_SCALAR_Y, + REG_TYPE_VIRT_SCALAR_Z, + REG_TYPE_VIRT_SCALAR_W, + REG_TYPE_VIRT_VEC2T_XY, + REG_TYPE_VIRT_VEC2T_ZW, + NUM_REG_TYPES, +} reg_type; + +/* writemask when used as dest */ +static const uint8_t +reg_writemask[NUM_REG_TYPES] = { + [REG_TYPE_VEC4] = 0xf, + [REG_TYPE_VIRT_SCALAR_X] = 0x1, + [REG_TYPE_VIRT_SCALAR_Y] = 0x2, + [REG_TYPE_VIRT_VEC2_XY] = 0x3, + [REG_TYPE_VIRT_VEC2T_XY] = 0x3, + [REG_TYPE_VIRT_SCALAR_Z] = 0x4, + [REG_TYPE_VIRT_VEC2_XZ] = 0x5, + [REG_TYPE_VIRT_VEC2_YZ] = 0x6, + [REG_TYPE_VIRT_VEC3_XYZ] = 0x7, + [REG_TYPE_VIRT_SCALAR_W] = 0x8, + [REG_TYPE_VIRT_VEC2_XW] = 0x9, + [REG_TYPE_VIRT_VEC2_YW] = 0xa, + [REG_TYPE_VIRT_VEC3_XYW] = 0xb, + [REG_TYPE_VIRT_VEC2_ZW] = 0xc, + [REG_TYPE_VIRT_VEC2T_ZW] = 0xc, + [REG_TYPE_VIRT_VEC3_XZW] = 0xd, + [REG_TYPE_VIRT_VEC3_YZW] = 0xe, +}; + +/* how to swizzle when used as a src */ +static const uint8_t +reg_swiz[NUM_REG_TYPES] = { + [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y), + [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z), + [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z), + [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z), + [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W), + [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W), + [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W), + [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X), + [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W), + [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W), + [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X), + [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X), +}; + +/* how to swizzle when used as a dest */ +static const uint8_t +reg_dst_swiz[NUM_REG_TYPES] = { + [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X), + [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X), + [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y), + [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y), + [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X), + [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y), + [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y), + [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z), + [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y), + [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y), + [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z), + [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z), +}; + +static inline int reg_get_type(int virt_reg) +{ + return virt_reg % NUM_REG_TYPES; +} + +static inline int reg_get_base(struct state *state, int virt_reg) +{ + /* offset by 1 to avoid reserved position register */ + if (state->shader->info.stage == MESA_SHADER_FRAGMENT) + return virt_reg / NUM_REG_TYPES + 1; + return virt_reg / NUM_REG_TYPES; +} + +static inline int reg_get_class(int virt_reg) +{ + switch (reg_get_type(virt_reg)) { + case REG_TYPE_VEC4: + return REG_CLASS_VEC4; + case REG_TYPE_VIRT_VEC3_XYZ: + case REG_TYPE_VIRT_VEC3_XYW: + case REG_TYPE_VIRT_VEC3_XZW: + case REG_TYPE_VIRT_VEC3_YZW: + return REG_CLASS_VIRT_VEC3; + case REG_TYPE_VIRT_VEC2_XY: + case REG_TYPE_VIRT_VEC2_XZ: + case REG_TYPE_VIRT_VEC2_XW: + case REG_TYPE_VIRT_VEC2_YZ: + case REG_TYPE_VIRT_VEC2_YW: + case REG_TYPE_VIRT_VEC2_ZW: + return REG_CLASS_VIRT_VEC2; + case REG_TYPE_VIRT_SCALAR_X: + case REG_TYPE_VIRT_SCALAR_Y: + case REG_TYPE_VIRT_SCALAR_Z: + case REG_TYPE_VIRT_SCALAR_W: + return REG_CLASS_VIRT_SCALAR; + case REG_TYPE_VIRT_VEC2T_XY: + case REG_TYPE_VIRT_VEC2T_ZW: + return REG_CLASS_VIRT_VEC2T; + } + + assert(false); + return 0; +} + +/* get unique ssa/reg index for nir_src */ +static unsigned +src_index(nir_function_impl *impl, nir_src *src) +{ + return src->is_ssa ? src->ssa->index : (src->reg.reg->index + impl->ssa_alloc); +} + +/* get unique ssa/reg index for nir_dest */ +static unsigned +dest_index(nir_function_impl *impl, nir_dest *dest) +{ + return dest->is_ssa ? dest->ssa.index : (dest->reg.reg->index + impl->ssa_alloc); +} + +/* nir_src to allocated register */ +static hw_src +ra_src(struct state *state, nir_src *src) +{ + unsigned reg = ra_get_node_reg(state->g, state->live_map[src_index(state->impl, src)]); + return SRC_REG(reg_get_base(state, reg), reg_swiz[reg_get_type(reg)]); +} + +static hw_src +get_src(struct state *state, nir_src *src) +{ + if (!src->is_ssa) + return ra_src(state, src); + + nir_instr *instr = src->ssa->parent_instr; + + if (instr->pass_flags & BYPASS_SRC) { + assert(instr->type == nir_instr_type_alu); + nir_alu_instr *alu = nir_instr_as_alu(instr); + assert(alu->op == nir_op_mov); + return src_swizzle(get_src(state, &alu->src[0].src), ALU_SWIZ(&alu->src[0])); + } + + switch (instr->type) { + case nir_instr_type_load_const: + return const_src(state, nir_instr_as_load_const(instr)->value, src->ssa->num_components); + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_load_input: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_uniform: + return ra_src(state, src); + case nir_intrinsic_load_front_face: + return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL }; + case nir_intrinsic_load_frag_coord: + return SRC_REG(0, INST_SWIZ_IDENTITY); + default: + assert(0); + break; + } + } break; + case nir_instr_type_alu: + case nir_instr_type_tex: + return ra_src(state, src); + case nir_instr_type_ssa_undef: { + /* return zero to deal with broken Blur demo */ + nir_const_value value = CONST(0); + return src_swizzle(const_src(state, &value, 1), SWIZZLE(X,X,X,X)); + } + default: + assert(0); + break; + } + + return SRC_DISABLE; +} + +static void +update_swiz_mask(nir_alu_instr *alu, nir_dest *dest, unsigned *swiz, unsigned *mask) +{ + if (!swiz) + return; + + bool is_vec = dest != NULL; + unsigned swizzle = 0, write_mask = 0; + for (unsigned i = 0; i < 4; i++) { + /* channel not written */ + if (!(alu->dest.write_mask & (1 << i))) + continue; + /* src is different (only check for vecN) */ + if (is_vec && alu->src[i].src.ssa != &dest->ssa) + continue; + + unsigned src_swiz = is_vec ? alu->src[i].swizzle[0] : alu->src[0].swizzle[i]; + swizzle |= (*swiz >> src_swiz * 2 & 3) << i * 2; + /* this channel isn't written through this chain */ + if (*mask & (1 << src_swiz)) + write_mask |= 1 << i; + } + *swiz = swizzle; + *mask = write_mask; +} + +static bool +vec_dest_has_swizzle(nir_alu_instr *vec, nir_ssa_def *ssa) +{ + for (unsigned i = 0; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i)) || vec->src[i].src.ssa != ssa) + continue; + + if (vec->src[i].swizzle[0] != i) + return true; + } + + /* don't deal with possible bypassed vec/mov chain */ + nir_foreach_use(use_src, ssa) { + nir_instr *instr = use_src->parent_instr; + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + switch (alu->op) { + case nir_op_mov: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + return true; + default: + break; + } + } + return false; +} + +static nir_dest * +real_dest(nir_dest *dest, unsigned *swiz, unsigned *mask) +{ + if (!dest || !dest->is_ssa) + return dest; + + bool can_bypass_src = !list_length(&dest->ssa.if_uses); + nir_instr *p_instr = dest->ssa.parent_instr; + + /* if used by a vecN, the "real" destination becomes the vecN destination + * lower_alu guarantees that values used by a vecN are only used by that vecN + * we can apply the same logic to movs in a some cases too + */ + nir_foreach_use(use_src, &dest->ssa) { + nir_instr *instr = use_src->parent_instr; + + /* src bypass check: for now only deal with tex src mov case + * note: for alu don't bypass mov for multiple uniform sources + */ + switch (instr->type) { + case nir_instr_type_tex: + if (p_instr->type == nir_instr_type_alu && + nir_instr_as_alu(p_instr)->op == nir_op_mov) { + break; + } + default: + can_bypass_src = false; + break; + } + + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + switch (alu->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + assert(list_length(&dest->ssa.if_uses) == 0); + nir_foreach_use(use_src, &dest->ssa) + assert(use_src->parent_instr == instr); + + update_swiz_mask(alu, dest, swiz, mask); + break; + case nir_op_mov: { + switch (dest->ssa.parent_instr->type) { + case nir_instr_type_alu: + case nir_instr_type_tex: + break; + default: + continue; + } + if (list_length(&dest->ssa.if_uses) || list_length(&dest->ssa.uses) > 1) + continue; + + update_swiz_mask(alu, NULL, swiz, mask); + break; + }; + default: + continue; + } + + assert(!(instr->pass_flags & BYPASS_SRC)); + instr->pass_flags |= BYPASS_DST; + return real_dest(&alu->dest.dest, swiz, mask); + } + + if (can_bypass_src && !(p_instr->pass_flags & BYPASS_DST)) { + p_instr->pass_flags |= BYPASS_SRC; + return NULL; + } + + return dest; +} + +/* get allocated dest register for nir_dest + * *p_swiz tells how the components need to be placed into register + */ +static hw_dst +ra_dest(struct state *state, nir_dest *dest, unsigned *p_swiz) +{ + unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf; + dest = real_dest(dest, &swiz, &mask); + + unsigned r = ra_get_node_reg(state->g, state->live_map[dest_index(state->impl, dest)]); + unsigned t = reg_get_type(r); + + *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]); + + return (hw_dst) { + .use = 1, + .reg = reg_get_base(state, r), + .write_mask = inst_write_mask_compose(mask, reg_writemask[t]), + }; +} + +/* if instruction dest needs a register, return nir_dest for it */ +static nir_dest * +dest_for_instr(nir_instr *instr) +{ + nir_dest *dest = NULL; + + switch (instr->type) { + case nir_instr_type_alu: + dest = &nir_instr_as_alu(instr)->dest.dest; + break; + case nir_instr_type_tex: + dest =&nir_instr_as_tex(instr)->dest; + break; + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_load_uniform || + intr->intrinsic == nir_intrinsic_load_input || + intr->intrinsic == nir_intrinsic_load_instance_id) + dest = &intr->dest; + } + default: + break; + } + return real_dest(dest, NULL, NULL); +} + +struct live_def { + nir_instr *instr; + nir_dest *dest; /* cached dest_for_instr */ + unsigned live_start, live_end; /* live range */ +}; + +static void +range_include(struct live_def *def, unsigned index) +{ + if (def->live_start > index) + def->live_start = index; + if (def->live_end < index) + def->live_end = index; +} + +struct live_defs_state { + unsigned num_defs; + unsigned bitset_words; + + nir_function_impl *impl; + nir_block *block; /* current block pointer */ + unsigned index; /* current live index */ + + struct live_def *defs; + unsigned *live_map; /* to map ssa/reg index into defs array */ + + nir_block_worklist worklist; +}; + +static bool +init_liveness_block(nir_block *block, + struct live_defs_state *state) +{ + block->live_in = reralloc(block, block->live_in, BITSET_WORD, + state->bitset_words); + memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD)); + + block->live_out = reralloc(block, block->live_out, BITSET_WORD, + state->bitset_words); + memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD)); + + nir_block_worklist_push_head(&state->worklist, block); + + return true; +} + +static bool +set_src_live(nir_src *src, void *void_state) +{ + struct live_defs_state *state = void_state; + + if (src->is_ssa) { + nir_instr *instr = src->ssa->parent_instr; + + if (is_sysval(instr)) + return true; + + switch (instr->type) { + case nir_instr_type_load_const: + case nir_instr_type_ssa_undef: + return true; + case nir_instr_type_alu: { + /* alu op bypass */ + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (instr->pass_flags & BYPASS_SRC) { + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) + set_src_live(&alu->src[i].src, state); + return true; + } + } break; + default: + break; + } + } + + unsigned i = state->live_map[src_index(state->impl, src)]; + assert(i != ~0u); + + BITSET_SET(state->block->live_in, i); + range_include(&state->defs[i], state->index); + + return true; +} + +static bool +propagate_across_edge(nir_block *pred, nir_block *succ, + struct live_defs_state *state) +{ + BITSET_WORD progress = 0; + for (unsigned i = 0; i < state->bitset_words; ++i) { + progress |= succ->live_in[i] & ~pred->live_out[i]; + pred->live_out[i] |= succ->live_in[i]; + } + return progress != 0; +} + +static unsigned +live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map) +{ + struct live_defs_state state; + unsigned block_live_index[impl->num_blocks + 1]; + + state.impl = impl; + state.defs = defs; + state.live_map = live_map; + + state.num_defs = 0; + nir_foreach_block(block, impl) { + block_live_index[block->index] = state.num_defs; + nir_foreach_instr(instr, block) { + nir_dest *dest = dest_for_instr(instr); + if (!dest) + continue; + + unsigned idx = dest_index(impl, dest); + /* register is already in defs */ + if (live_map[idx] != ~0u) + continue; + + defs[state.num_defs] = (struct live_def) {instr, dest, state.num_defs, 0}; + + /* input live from the start */ + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_load_input || + intr->intrinsic == nir_intrinsic_load_instance_id) + defs[state.num_defs].live_start = 0; + } + + live_map[idx] = state.num_defs; + state.num_defs++; + } + } + block_live_index[impl->num_blocks] = state.num_defs; + + nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL); + + /* We now know how many unique ssa definitions we have and we can go + * ahead and allocate live_in and live_out sets and add all of the + * blocks to the worklist. + */ + state.bitset_words = BITSET_WORDS(state.num_defs); + nir_foreach_block(block, impl) { + init_liveness_block(block, &state); + } + + /* We're now ready to work through the worklist and update the liveness + * sets of each of the blocks. By the time we get to this point, every + * block in the function implementation has been pushed onto the + * worklist in reverse order. As long as we keep the worklist + * up-to-date as we go, everything will get covered. + */ + while (!nir_block_worklist_is_empty(&state.worklist)) { + /* We pop them off in the reverse order we pushed them on. This way + * the first walk of the instructions is backwards so we only walk + * once in the case of no control flow. + */ + nir_block *block = nir_block_worklist_pop_head(&state.worklist); + state.block = block; + + memcpy(block->live_in, block->live_out, + state.bitset_words * sizeof(BITSET_WORD)); + + state.index = block_live_index[block->index + 1]; + + nir_if *following_if = nir_block_get_following_if(block); + if (following_if) + set_src_live(&following_if->condition, &state); + + nir_foreach_instr_reverse(instr, block) { + /* when we come across the next "live" instruction, decrement index */ + if (state.index && instr == defs[state.index - 1].instr) { + state.index--; + /* the only source of writes to registers is phis: + * we don't expect any partial write_mask alus + * so clearing live_in here is OK + */ + BITSET_CLEAR(block->live_in, state.index); + } + + /* don't set_src_live for not-emitted instructions */ + if (instr->pass_flags) + continue; + + unsigned index = state.index; + + /* output live till the end */ + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_store_output) + state.index = ~0u; + } + + nir_foreach_src(instr, set_src_live, &state); + + state.index = index; + } + assert(state.index == block_live_index[block->index]); + + /* Walk over all of the predecessors of the current block updating + * their live in with the live out of this one. If anything has + * changed, add the predecessor to the work list so that we ensure + * that the new information is used. + */ + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *)entry->key; + if (propagate_across_edge(pred, block, &state)) + nir_block_worklist_push_tail(&state.worklist, pred); + } + } + + nir_block_worklist_fini(&state.worklist); + + /* apply live_in/live_out to ranges */ + + nir_foreach_block(block, impl) { + BITSET_WORD tmp; + int i; + + BITSET_FOREACH_SET(i, tmp, block->live_in, state.num_defs) + range_include(&state.defs[i], block_live_index[block->index]); + + BITSET_FOREACH_SET(i, tmp, block->live_out, state.num_defs) + range_include(&state.defs[i], block_live_index[block->index + 1]); + } + + return state.num_defs; +} + +/* precomputed by register_allocate */ +static unsigned int *q_values[] = { + (unsigned int[]) { 1, 2, 3, 4, 2 }, + (unsigned int[]) { 3, 5, 6, 6, 5 }, + (unsigned int[]) { 3, 4, 4, 4, 4 }, + (unsigned int[]) { 1, 1, 1, 1, 1 }, + (unsigned int[]) { 1, 2, 2, 2, 1 }, +}; + +static void +ra_assign(struct state *state, nir_shader *shader) +{ + struct ra_regs *regs = ra_alloc_reg_set(NULL, option(max_temps) * + NUM_REG_TYPES, false); + + /* classes always be created from index 0, so equal to the class enum + * which represents a register with (c+1) components + */ + for (int c = 0; c < NUM_REG_CLASSES; c++) + ra_alloc_reg_class(regs); + /* add each register of each class */ + for (int r = 0; r < NUM_REG_TYPES * option(max_temps); r++) + ra_class_add_reg(regs, reg_get_class(r), r); + /* set conflicts */ + for (int r = 0; r < option(max_temps); r++) { + for (int i = 0; i < NUM_REG_TYPES; i++) { + for (int j = 0; j < i; j++) { + if (reg_writemask[i] & reg_writemask[j]) { + ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i, + NUM_REG_TYPES * r + j); + } + } + } + } + ra_set_finalize(regs, q_values); + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + /* liveness and interference */ + + nir_index_blocks(impl); + nir_index_ssa_defs(impl); + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) + instr->pass_flags = 0; + } + + /* this gives an approximation/upper limit on how many nodes are needed + * (some ssa values do not represent an allocated register) + */ + unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc; + unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes); + memset(live_map, 0xff, sizeof(unsigned) * max_nodes); + struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes); + + unsigned num_nodes = live_defs(impl, defs, live_map); + struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes); + + /* set classes from num_components */ + for (unsigned i = 0; i < num_nodes; i++) { + nir_instr *instr = defs[i].instr; + nir_dest *dest = defs[i].dest; + + ra_set_node_class(g, i, nir_dest_num_components(*dest) - 1); + + if (instr->type == nir_instr_type_alu && option(etna_new_transcendentals)) { + switch (nir_instr_as_alu(instr)->op) { + case nir_op_fdiv: + case nir_op_flog2: + case nir_op_fsin: + case nir_op_fcos: + assert(dest->is_ssa); + ra_set_node_class(g, i, REG_CLASS_VIRT_VEC2T); + default: + break; + } + } + } + + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_dest *dest = dest_for_instr(instr); + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + unsigned reg; + + switch (intr->intrinsic) { + case nir_intrinsic_store_output: { + /* don't want output to be swizzled + * TODO: better would be to set the type to X/XY/XYZ/XYZW + */ + ra_set_node_class(g, live_map[src_index(impl, &intr->src[0])], REG_CLASS_VEC4); + } continue; + case nir_intrinsic_load_input: + reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) { + REG_TYPE_VIRT_SCALAR_X, + REG_TYPE_VIRT_VEC2_XY, + REG_TYPE_VIRT_VEC3_XYZ, + REG_TYPE_VEC4, + }[nir_dest_num_components(*dest) - 1]; + break; + case nir_intrinsic_load_instance_id: + reg = option(id_reg) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y; + break; + default: + continue; + } + + ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg); + } + } + + /* add interference for intersecting live ranges */ + for (unsigned i = 0; i < num_nodes; i++) { + assert(defs[i].live_start < defs[i].live_end); + for (unsigned j = 0; j < i; j++) { + if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end) + continue; + ra_add_node_interference(g, i, j); + } + } + + ralloc_free(defs); + + /* Allocate registers */ + bool ok = ra_allocate(g); + assert(ok); + + state->g = g; + state->regs = regs; + state->live_map = live_map; + state->num_nodes = num_nodes; +} + +static unsigned +ra_finish(struct state *state) +{ + /* TODO: better way to get number of registers used? */ + unsigned j = 0; + for (unsigned i = 0; i < state->num_nodes; i++) { + j = MAX2(j, reg_get_base(state, ra_get_node_reg(state->g, i)) + 1); + } + + ralloc_free(state->g); + ralloc_free(state->regs); + ralloc_free(state->live_map); + + return j; +} + +static void +emit_alu(struct state *state, nir_alu_instr * alu) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + + /* marked as dead instruction (vecN and other bypassed instr) */ + if (alu->instr.pass_flags) + return; + + assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4)); + + unsigned dst_swiz; + hw_dst dst = ra_dest(state, &alu->dest.dest, &dst_swiz); + + /* compose alu write_mask with RA write mask */ + if (!alu->dest.dest.is_ssa) + dst.write_mask = inst_write_mask_compose(alu->dest.write_mask, dst.write_mask); + + switch (alu->op) { + case nir_op_fdot2: + case nir_op_fdot3: + case nir_op_fdot4: + /* not per-component - don't compose dst_swiz */ + dst_swiz = INST_SWIZ_IDENTITY; + break; + default: + break; + } + + hw_src srcs[3]; + + for (int i = 0; i < info->num_inputs; i++) { + nir_alu_src *asrc = &alu->src[i]; + hw_src src; + + src = src_swizzle(get_src(state, &asrc->src), ALU_SWIZ(asrc)); + src = src_swizzle(src, dst_swiz); + + if (src.rgroup != INST_RGROUP_IMMEDIATE) { + src.neg = asrc->negate || (alu->op == nir_op_fneg); + src.abs = asrc->abs || (alu->op == nir_op_fabs); + } else { + assert(!asrc->negate && alu->op != nir_op_fneg); + assert(!asrc->abs && alu->op != nir_op_fabs); + } + + srcs[i] = src; + } + + emit(alu, alu->op, dst, srcs, alu->dest.saturate || (alu->op == nir_op_fsat)); +} + +static void +emit_tex(struct state *state, nir_tex_instr * tex) +{ + unsigned dst_swiz; + hw_dst dst = ra_dest(state, &tex->dest, &dst_swiz); + nir_src *coord = NULL; + nir_src *lod_bias = NULL; + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + coord = &tex->src[i].src; + break; + case nir_tex_src_bias: + case nir_tex_src_lod: + assert(!lod_bias); + lod_bias = &tex->src[i].src; + break; + default: + assert(0); + break; + } + } + + emit(tex, tex->op, tex->sampler_index, dst_swiz, dst, get_src(state, coord), + lod_bias ? get_src(state, lod_bias) : SRC_DISABLE); +} + +static void +emit_intrinsic(struct state *state, nir_intrinsic_instr * intr) +{ + switch (intr->intrinsic) { + case nir_intrinsic_store_output: + emit(output, nir_intrinsic_base(intr), get_src(state, &intr->src[0])); + break; + case nir_intrinsic_discard_if: + emit(discard, get_src(state, &intr->src[0])); + break; + case nir_intrinsic_discard: + emit(discard, SRC_DISABLE); + break; + case nir_intrinsic_load_uniform: { + unsigned dst_swiz; + hw_dst dst = ra_dest(state, &intr->dest, &dst_swiz); + /* TODO: might have a problem with dst_swiz .. */ + emit(load_ubo, dst, get_src(state, &intr->src[0]), const_src(state, &UNIFORM_BASE(nir_intrinsic_base(intr) * 16), 1)); + } break; + case nir_intrinsic_load_front_face: + case nir_intrinsic_load_frag_coord: + assert(intr->dest.is_ssa); /* TODO - lower phis could cause this */ + break; + case nir_intrinsic_load_input: + case nir_intrinsic_load_instance_id: + break; + default: + assert(0); + } +} + +static void +emit_instr(struct state *state, nir_instr * instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + emit_alu(state, nir_instr_as_alu(instr)); + break; + case nir_instr_type_tex: + emit_tex(state, nir_instr_as_tex(instr)); + break; + case nir_instr_type_intrinsic: + emit_intrinsic(state, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_jump: + assert(nir_instr_is_last(instr)); + case nir_instr_type_load_const: + case nir_instr_type_ssa_undef: + break; + default: + assert(0); + break; + } +} + +static void +emit_block(struct state *state, nir_block * block) +{ + emit(block_start, block->index); + + nir_foreach_instr(instr, block) + emit_instr(state, instr); + + /* succs->index < block->index is for the loop case */ + nir_block *succs = block->successors[0]; + if (nir_block_ends_in_jump(block) || succs->index < block->index) + emit(jump, succs->index, SRC_DISABLE); +} + +static void +emit_cf_list(struct state *state, struct exec_list *list); + +static void +emit_if(struct state *state, nir_if * nif) +{ + emit(jump, nir_if_first_else_block(nif)->index, get_src(state, &nif->condition)); + emit_cf_list(state, &nif->then_list); + + /* jump at end of then_list to skip else_list + * not needed if then_list already ends with a jump or else_list is empty + */ + if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) && + !nir_cf_list_is_empty_block(&nif->else_list)) + emit(jump, nir_if_last_else_block(nif)->successors[0]->index, SRC_DISABLE); + + emit_cf_list(state, &nif->else_list); +} + +static void +emit_cf_list(struct state *state, struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) { + switch (node->type) { + case nir_cf_node_block: + emit_block(state, nir_cf_node_as_block(node)); + break; + case nir_cf_node_if: + emit_if(state, nir_cf_node_as_if(node)); + break; + case nir_cf_node_loop: + emit_cf_list(state, &nir_cf_node_as_loop(node)->body); + break; + default: + assert(0); + break; + } + } +} + +/* based on nir_lower_vec_to_movs */ +static unsigned +insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) +{ + assert(start_idx < nir_op_infos[vec->op].num_inputs); + unsigned write_mask = (1u << start_idx); + + nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov); + nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov); + + mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0]; + mov->src[0].negate = vec->src[start_idx].negate; + mov->src[0].abs = vec->src[start_idx].abs; + + unsigned num_components = 1; + + for (unsigned i = start_idx + 1; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; + + if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) && + vec->src[i].negate == vec->src[start_idx].negate && + vec->src[i].abs == vec->src[start_idx].abs) { + write_mask |= (1 << i); + mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0]; + num_components++; + } + } + + mov->dest.write_mask = (1 << num_components) - 1; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 32, NULL); + + /* replace vec srcs with inserted mov */ + for (unsigned i = 0, j = 0; i < 4; i++) { + if (!(write_mask & (1 << i))) + continue; + + nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, nir_src_for_ssa(&mov->dest.dest.ssa)); + vec->src[i].swizzle[0] = j++; + } + + nir_instr_insert_before(&vec->instr, &mov->instr); + + return write_mask; +} + +/* + * for vecN instructions: + * -merge constant sources into a single src + * -insert movs (nir_lower_vec_to_movs equivalent) + * for non-vecN instructions: + * -try to merge constants as single constant + * -insert movs for multiple constants (pre-HALTI5) + */ +static void +lower_alu(struct state *state, nir_alu_instr *alu) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + + nir_builder b; + nir_builder_init(&b, state->impl); + b.cursor = nir_before_instr(&alu->instr); + + switch (alu->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: { + nir_const_value value[4]; + unsigned num_components = 0; + + for (unsigned i = 0; i < info->num_inputs; i++) { + nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); + if (cv) + value[num_components++] = cv[alu->src[i].swizzle[0]]; + } + + if (num_components <= 1) /* nothing to do */ + break; + + nir_ssa_def *def = nir_build_imm(&b, num_components, 32, value); + + if (num_components == info->num_inputs) { + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(def)); + nir_instr_remove(&alu->instr); + return; + } + + for (unsigned i = 0, j = 0; i < info->num_inputs; i++) { + nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); + if (!cv) + continue; + + nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def)); + alu->src[i].swizzle[0] = j++; + } + } break; + default: { + if (!option(single_const_src)) + return; + + /* pre-GC7000L can only have 1 uniform src per instruction */ + nir_const_value value[4] = {}; + uint8_t swizzle[4][4] = {}; + unsigned swiz_max = 0, num_const = 0; + + for (unsigned i = 0; i < info->num_inputs; i++) { + nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); + if (!cv) + continue; + + unsigned num_components = info->input_sizes[i] ?: alu->dest.dest.ssa.num_components; + for (unsigned j = 0; j < num_components; j++) { + int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64); + swizzle[i][j] = idx; + swiz_max = MAX2(swiz_max, (unsigned) idx); + } + num_const++; + } + + /* nothing to do */ + if (num_const <= 1) + return; + + /* resolve with single combined const src */ + if (swiz_max < 4) { + nir_ssa_def *def = nir_build_imm(&b, swiz_max + 1, 32, value); + + for (unsigned i = 0; i < info->num_inputs; i++) { + nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); + if (!cv) + continue; + + nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def)); + + for (unsigned j = 0; j < 4; j++) + alu->src[i].swizzle[j] = swizzle[i][j]; + } + return; + } + + /* resolve with movs */ + num_const = 0; + for (unsigned i = 0; i < info->num_inputs; i++) { + nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); + if (!cv) + continue; + + num_const++; + if (num_const == 1) + continue; + + nir_ssa_def *mov = nir_mov(&b, alu->src[i].src.ssa); + nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(mov)); + } + } return; + } + + unsigned finished_write_mask = 0; + for (unsigned i = 0; i < 4; i++) { + if (!(alu->dest.write_mask & (1 << i))) + continue; + + nir_ssa_def *ssa = alu->src[i].src.ssa; + + /* check that vecN instruction is only user of this */ + bool need_mov = list_length(&ssa->if_uses) != 0; + nir_foreach_use(use_src, ssa) { + if (use_src->parent_instr != &alu->instr) + need_mov = true; + } + + nir_instr *instr = ssa->parent_instr; + switch (instr->type) { + case nir_instr_type_alu: + case nir_instr_type_tex: + break; + case nir_instr_type_intrinsic: + if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) { + need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->dest.ssa); + break; + } + default: + need_mov = true; + } + + if (need_mov && !(finished_write_mask & (1 << i))) + finished_write_mask |= insert_vec_mov(alu, i, state->shader); + } +} + +static bool +emit_shader(nir_shader *shader, const struct emit_options *options, + unsigned *num_temps, unsigned *num_consts) +{ + struct state state = { + .options = options, + .shader = shader, + .impl = nir_shader_get_entrypoint(shader), + }; + + nir_builder b; + nir_builder_init(&b, state.impl); + + /* convert non-dynamic uniform loads to constants, etc */ + nir_foreach_block(block, state.impl) { + nir_foreach_instr_safe(instr, block) { + switch(instr->type) { + case nir_instr_type_alu: + /* deals with vecN and const srcs */ + lower_alu(&state, nir_instr_as_alu(instr)); + break; + case nir_instr_type_load_const: { + nir_load_const_instr *load_const = nir_instr_as_load_const(instr); + for (unsigned i = 0; i < load_const->def.num_components; i++) + load_const->value[i] = CONST(load_const->value[i].u32); + } break; + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_uniform) + break; + nir_const_value *off = nir_src_as_const_value(intr->src[0]); + if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) + break; + + unsigned base = nir_intrinsic_base(intr) + off[0].u32 / 16; + nir_const_value value[4]; + + for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) { + if (nir_intrinsic_base(intr) < 0) + value[i] = TEXSCALE(~nir_intrinsic_base(intr), i); + else + value[i] = UNIFORM(base * 4 + i); + } + + b.cursor = nir_after_instr(instr); + nir_ssa_def *def = nir_build_imm(&b, intr->dest.ssa.num_components, 32, value); + + nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(def)); + nir_instr_remove(instr); + } break; + default: + break; + } + } + } + + /* add mov for any store output using sysval/const */ + nir_foreach_block(block, state.impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_store_output: { + nir_src *src = &intr->src[0]; + if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr)) { + b.cursor = nir_before_instr(instr); + nir_instr_rewrite_src(instr, src, nir_src_for_ssa(nir_mov(&b, src->ssa))); + } + } break; + default: + break; + } + } + } + + /* call directly to avoid validation (load_const don't pass validation at this point) */ + nir_convert_from_ssa(shader, true); + nir_opt_dce(shader); + + ra_assign(&state, shader); + + emit_cf_list(&state, &nir_shader_get_entrypoint(shader)->body); + + *num_temps = ra_finish(&state); + *num_consts = state.const_count; + return true; +} diff --git a/src/gallium/drivers/etnaviv/etnaviv_debug.h b/src/gallium/drivers/etnaviv/etnaviv_debug.h index 4051e95dd5f..7676e5adce1 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_debug.h +++ b/src/gallium/drivers/etnaviv/etnaviv_debug.h @@ -53,6 +53,7 @@ #define ETNA_DBG_DRAW_STALL 0x400000 /* Stall FE/PE after every draw op */ #define ETNA_DBG_SHADERDB 0x800000 /* dump program compile information */ #define ETNA_DBG_NO_SINGLEBUF 0x1000000 /* disable single buffer feature */ +#define ETNA_DBG_NIR 0x2000000 /* use new NIR compiler */ extern int etna_mesa_debug; /* set in etna_screen.c from ETNA_DEBUG */ diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c index c0b39ec600c..88ab1b02249 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c @@ -72,6 +72,7 @@ static const struct debug_named_value debug_options[] = { {"draw_stall", ETNA_DBG_DRAW_STALL, "Stall FE/PE after each rendered primitive"}, {"shaderdb", ETNA_DBG_SHADERDB, "Enable shaderdb output"}, {"no_singlebuffer",ETNA_DBG_NO_SINGLEBUF, "Disable single buffer feature"}, + {"nir", ETNA_DBG_NIR, "use new NIR compiler"}, DEBUG_NAMED_VALUE_END }; @@ -154,6 +155,11 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_NATIVE_FENCE_FD: return screen->drm_version >= ETNA_DRM_VERSION_FENCE_FD; + case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: + case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: /* note: not integer */ + return DBG_ENABLED(ETNA_DBG_NIR); + case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL: + return 0; /* Memory */ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: @@ -322,7 +328,7 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen, ? screen->specs.fragment_sampler_count : screen->specs.vertex_sampler_count; case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_TGSI; + return DBG_ENABLED(ETNA_DBG_NIR) ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: return 4096; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: @@ -743,6 +749,13 @@ etna_screen_bo_from_handle(struct pipe_screen *pscreen, return bo; } +static const void * +etna_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, unsigned shader) +{ + return &etna_screen(pscreen)->options; +} + struct pipe_screen * etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu, struct renderonly *ro) @@ -845,6 +858,26 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu, if (!etna_get_specs(screen)) goto fail; + screen->options = (nir_shader_compiler_options) { + .lower_fpow = true, + .lower_sub = true, + .lower_ftrunc = true, + .fuse_ffma = true, + .lower_bitops = true, + .lower_all_io_to_temps = true, + .vertex_id_zero_based = true, + .lower_flrp32 = true, + .lower_fmod = true, + .lower_vector_cmp = true, + .lower_fdph = true, + .lower_fdiv = true, /* !screen->specs.has_new_transcendentals */ + .lower_fsign = !screen->specs.has_sign_floor_ceil, + .lower_ffloor = !screen->specs.has_sign_floor_ceil, + .lower_fceil = !screen->specs.has_sign_floor_ceil, + .lower_fsqrt = !screen->specs.has_sin_cos_sqrt, + .lower_sincos = !screen->specs.has_sin_cos_sqrt, + }; + /* apply debug options that disable individual features */ if (DBG_ENABLED(ETNA_DBG_NO_EARLY_Z)) screen->features[viv_chipFeatures] |= chipFeatures_NO_EARLY_Z; @@ -861,6 +894,7 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu, pscreen->get_param = etna_screen_get_param; pscreen->get_paramf = etna_screen_get_paramf; pscreen->get_shader_param = etna_screen_get_shader_param; + pscreen->get_compiler_options = etna_get_compiler_options; pscreen->get_name = etna_screen_get_name; pscreen->get_vendor = etna_screen_get_vendor; diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.h b/src/gallium/drivers/etnaviv/etnaviv_screen.h index 4e850d4b7ee..99e2cc20ac7 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.h +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.h @@ -38,6 +38,7 @@ #include "util/slab.h" #include "util/u_dynarray.h" #include "util/u_helpers.h" +#include "compiler/nir/nir.h" struct etna_bo; @@ -87,6 +88,8 @@ struct etna_screen { /* set of resources used by currently-unsubmitted renders */ mtx_t lock; struct set *used_resources; + + nir_shader_compiler_options options; }; static inline struct etna_screen * diff --git a/src/gallium/drivers/etnaviv/etnaviv_shader.c b/src/gallium/drivers/etnaviv/etnaviv_shader.c index 479c88bb44d..f0cde53d023 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_shader.c +++ b/src/gallium/drivers/etnaviv/etnaviv_shader.c @@ -33,6 +33,7 @@ #include "etnaviv_util.h" #include "tgsi/tgsi_parse.h" +#include "nir/tgsi_to_nir.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -49,7 +50,7 @@ static bool etna_icache_upload_shader(struct etna_context *ctx, struct etna_shad etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE); memcpy(buf, v->code, v->code_size*4); etna_bo_cpu_fini(v->bo); - DBG("Uploaded %s of %u words to bo %p", v->processor == PIPE_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo); + DBG("Uploaded %s of %u words to bo %p", v->stage == MESA_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo); return true; } @@ -67,8 +68,8 @@ etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs, { struct etna_shader_link_info link = { }; - assert(vs->processor == PIPE_SHADER_VERTEX); - assert(fs->processor == PIPE_SHADER_FRAGMENT); + assert(vs->stage == MESA_SHADER_VERTEX); + assert(fs->stage == MESA_SHADER_FRAGMENT); #ifdef DEBUG if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) { @@ -275,10 +276,10 @@ etna_shader_update_vs_inputs(struct compiled_shader_state *cs, static inline const char * etna_shader_stage(struct etna_shader_variant *shader) { - switch (shader->processor) { - case PIPE_SHADER_VERTEX: return "VERT"; - case PIPE_SHADER_FRAGMENT: return "FRAG"; - case PIPE_SHADER_COMPUTE: return "CL"; + switch (shader->stage) { + case MESA_SHADER_VERTEX: return "VERT"; + case MESA_SHADER_FRAGMENT: return "FRAG"; + case MESA_SHADER_COMPUTE: return "CL"; default: unreachable("invalid type"); return NULL; @@ -372,7 +373,14 @@ etna_create_shader_state(struct pipe_context *pctx, static uint32_t id; shader->id = id++; shader->specs = &ctx->specs; - shader->tokens = tgsi_dup_tokens(pss->tokens); + + if (DBG_ENABLED(ETNA_DBG_NIR)) + shader->nir = (pss->type == PIPE_SHADER_IR_NIR) ? pss->ir.nir : + tgsi_to_nir(pss->tokens, pctx->screen); + else + shader->tokens = tgsi_dup_tokens(pss->tokens); + + if (etna_mesa_debug & ETNA_DBG_SHADERDB) { /* if shader-db run, create a standard variant immediately @@ -401,7 +409,7 @@ etna_delete_shader_state(struct pipe_context *pctx, void *ss) etna_destroy_shader(t); } - FREE(shader->tokens); + ralloc_free(shader->nir); FREE(shader); } diff --git a/src/gallium/drivers/etnaviv/etnaviv_shader.h b/src/gallium/drivers/etnaviv/etnaviv_shader.h index 121d5815ba0..3c5b6e65d94 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_shader.h +++ b/src/gallium/drivers/etnaviv/etnaviv_shader.h @@ -31,6 +31,7 @@ struct etna_context; struct etna_shader_variant; +struct nir_shader; struct etna_shader_key { @@ -56,14 +57,15 @@ etna_shader_key_equal(struct etna_shader_key *a, struct etna_shader_key *b) } struct etna_shader { - /* shader id (for debug): */ - uint32_t id; - uint32_t variant_count; + /* shader id (for debug): */ + uint32_t id; + uint32_t variant_count; - struct tgsi_token *tokens; - const struct etna_specs *specs; + struct tgsi_token *tokens; + struct nir_shader *nir; + const struct etna_specs *specs; - struct etna_shader_variant *variants; + struct etna_shader_variant *variants; }; bool diff --git a/src/gallium/drivers/etnaviv/meson.build b/src/gallium/drivers/etnaviv/meson.build index c53eab16540..5b26ca69a75 100644 --- a/src/gallium/drivers/etnaviv/meson.build +++ b/src/gallium/drivers/etnaviv/meson.build @@ -37,6 +37,8 @@ files_etnaviv = files( 'etnaviv_clear_blit.h', 'etnaviv_compiler.c', 'etnaviv_compiler.h', + 'etnaviv_compiler_nir.c', + 'etnaviv_compiler_nir_emit.h', 'etnaviv_context.c', 'etnaviv_context.h', 'etnaviv_debug.h', @@ -97,7 +99,7 @@ libetnaviv = static_library( inc_include, inc_src, inc_gallium, inc_gallium_aux, inc_etnaviv, ], link_with: libetnaviv_drm, - dependencies : dep_libdrm, + dependencies : [dep_libdrm, idep_nir_headers], ) etnaviv_compiler = executable( @@ -115,4 +117,5 @@ etnaviv_compiler = executable( driver_etnaviv = declare_dependency( compile_args : '-DGALLIUM_ETNAVIV', link_with : [libetnaviv, libetnavivdrm], + dependencies : idep_nir, ) diff --git a/src/gallium/winsys/etnaviv/drm/meson.build b/src/gallium/winsys/etnaviv/drm/meson.build index f931fd39e5b..f5491941755 100644 --- a/src/gallium/winsys/etnaviv/drm/meson.build +++ b/src/gallium/winsys/etnaviv/drm/meson.build @@ -26,5 +26,5 @@ libetnavivdrm = static_library( inc_etnaviv, ], link_with: libetnaviv_drm, - dependencies : [dep_libdrm], + dependencies : [dep_libdrm, idep_nir_headers], ) -- 2.30.2