From 8f11cc4cad7feb8d78f37709baac36c6a22034c6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 15 Jun 2020 14:12:58 -0700 Subject: [PATCH] freedreno/ir3: move output_loc to variant This moves the last bit of important state to be serialized from ir3_shader to ir3_shader_variant. Signed-off-by: Rob Clark Part-of: --- src/freedreno/ir3/ir3_nir.c | 6 +++--- src/freedreno/ir3/ir3_nir.h | 4 ++-- src/freedreno/ir3/ir3_nir_lower_tess.c | 18 +++++++++-------- src/freedreno/ir3/ir3_shader.h | 11 +++++----- src/freedreno/vulkan/tu_pipeline.c | 8 ++++---- .../drivers/freedreno/a6xx/fd6_const.c | 20 +++++++++---------- src/gallium/drivers/freedreno/a6xx/fd6_draw.c | 2 +- .../drivers/freedreno/a6xx/fd6_program.c | 8 ++++---- 8 files changed, 40 insertions(+), 37 deletions(-) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index a73210e5cf2..93b8012e1d5 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -282,18 +282,18 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) if (so->key.has_gs || so->key.tessellation) { switch (so->shader->type) { case MESA_SHADER_VERTEX: - NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so->shader, so->key.tessellation); + NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so, so->key.tessellation); progress = true; break; case MESA_SHADER_TESS_CTRL: - NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so->shader, so->key.tessellation); + NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so, so->key.tessellation); NIR_PASS_V(s, ir3_nir_lower_to_explicit_input); progress = true; break; case MESA_SHADER_TESS_EVAL: NIR_PASS_V(s, ir3_nir_lower_tess_eval, so->key.tessellation); if (so->key.has_gs) - NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so->shader, so->key.tessellation); + NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so, so->key.tessellation); progress = true; break; case MESA_SHADER_GEOMETRY: diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 0ad9b35f5fe..915f1638419 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -45,9 +45,9 @@ bool ir3_nir_lower_tex_prefetch(nir_shader *shader); void ir3_nir_lower_to_explicit_output(nir_shader *shader, - struct ir3_shader *s, unsigned topology); + struct ir3_shader_variant *v, unsigned topology); void ir3_nir_lower_to_explicit_input(nir_shader *shader); -void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology); +void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology); void ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology); void ir3_nir_lower_gs(nir_shader *shader); diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c index d5d80f78098..458b29848a0 100644 --- a/src/freedreno/ir3/ir3_nir_lower_tess.c +++ b/src/freedreno/ir3/ir3_nir_lower_tess.c @@ -227,12 +227,13 @@ local_thread_id(nir_builder *b) } void -ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader *s, unsigned topology) +ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *v, + unsigned topology) { struct state state = { }; build_primitive_map(shader, &state.map, &shader->outputs); - memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc)); + memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc)); nir_function_impl *impl = nir_shader_get_entrypoint(shader); assert(impl); @@ -241,7 +242,7 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader *s, unsig nir_builder_init(&b, impl); b.cursor = nir_before_cf_list(&impl->body); - if (s->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE) + if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE) state.header = nir_load_tcs_header_ir3(&b); else state.header = nir_load_gs_header_ir3(&b); @@ -252,7 +253,7 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader *s, unsig nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); - s->output_size = state.map.stride; + v->output_size = state.map.stride; } @@ -595,7 +596,8 @@ emit_tess_epilouge(nir_builder *b, struct state *state) } void -ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology) +ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v, + unsigned topology) { struct state state = { .topology = topology }; @@ -606,8 +608,8 @@ ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topol } build_primitive_map(shader, &state.map, &shader->outputs); - memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc)); - s->output_size = state.map.stride; + memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc)); + v->output_size = state.map.stride; nir_function_impl *impl = nir_shader_get_entrypoint(shader); assert(impl); @@ -984,7 +986,7 @@ ir3_link_geometry_stages(const struct ir3_shader_variant *producer, nir_foreach_variable(out_var, &producer->shader->nir->outputs) { if (in_var->data.location == out_var->data.location) { locs[in_var->data.driver_location] = - producer->shader->output_loc[out_var->data.driver_location] * factor; + producer->output_loc[out_var->data.driver_location] * factor; debug_assert(num_loc <= in_var->data.driver_location + 1); num_loc = in_var->data.driver_location + 1; diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 41ca394837b..fc2a924b58a 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -497,6 +497,12 @@ struct ir3_shader_variant { } outputs[32 + 2]; /* +POSITION +PSIZE */ bool writes_pos, writes_smask, writes_psize; + /* Size in dwords of all outputs for VS, size of entire patch for HS. */ + uint32_t output_size; + + /* Map from driver_location to byte offset in per-primitive storage */ + unsigned output_loc[32]; + /* attributes (VS) / varyings (FS): * Note that sysval's should come *after* normal inputs. */ @@ -631,11 +637,6 @@ struct ir3_shader { struct ir3_shader_variant *variants; mtx_t variants_lock; - uint32_t output_size; /* Size in dwords of all outputs for VS, size of entire patch for HS. */ - - /* Map from driver_location to byte offset in per-primitive storage */ - unsigned output_loc[32]; - /* Bitmask of bits of the shader key used by this shader. Used to avoid * recompiles for GL NOS that doesn't actually apply to the shader. */ diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 9c7cc4e08af..ef1422fa708 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -783,7 +783,7 @@ tu6_emit_vpc(struct tu_cs *cs, invocations = gs->shader->nir->info.gs.invocations - 1; /* Size of per-primitive alloction in ldlw memory in vec4s. */ vec4_size = gs->shader->nir->info.gs.vertices_in * - DIV_ROUND_UP(vs->shader->output_size, 4); + DIV_ROUND_UP(vs->output_size, 4); } else { vertices_out = 3; output = TESS_CW_TRIS; @@ -843,7 +843,7 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit(cs, 0); tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1); - tu_cs_emit(cs, vs->shader->output_size); + tu_cs_emit(cs, vs->output_size); } tu_cs_emit_pkt4(cs, REG_A6XX_SP_PRIMITIVE_CNTL, 1); @@ -1135,8 +1135,8 @@ tu6_emit_geometry_consts(struct tu_cs *cs, unsigned num_vertices = gs->shader->nir->info.gs.vertices_in; uint32_t params[4] = { - vs->shader->output_size * num_vertices * 4, /* primitive stride */ - vs->shader->output_size * 4, /* vertex stride */ + vs->output_size * num_vertices * 4, /* primitive stride */ + vs->output_size * 4, /* vertex stride */ 0, 0, }; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c index 2e9af8c03ae..e2c982d8226 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c @@ -175,8 +175,8 @@ emit_tess_consts(struct fd6_emit *emit) emit->gs->shader->nir->info.gs.vertices_in; uint32_t vs_params[4] = { - emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */ - emit->vs->shader->output_size * 4, /* vs vertex stride */ + emit->vs->output_size * num_vertices * 4, /* vs primitive stride */ + emit->vs->output_size * 4, /* vs vertex stride */ 0, 0 }; @@ -185,9 +185,9 @@ emit_tess_consts(struct fd6_emit *emit) if (emit->hs) { uint32_t hs_params[4] = { - emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */ - emit->vs->shader->output_size * 4, /* vs vertex stride */ - emit->hs->shader->output_size, + emit->vs->output_size * num_vertices * 4, /* vs primitive stride */ + emit->vs->output_size * 4, /* vs vertex stride */ + emit->hs->output_size, emit->info->vertices_per_patch }; @@ -198,9 +198,9 @@ emit_tess_consts(struct fd6_emit *emit) num_vertices = emit->gs->shader->nir->info.gs.vertices_in; uint32_t ds_params[4] = { - emit->ds->shader->output_size * num_vertices * 4, /* ds primitive stride */ - emit->ds->shader->output_size * 4, /* ds vertex stride */ - emit->hs->shader->output_size, /* hs vertex stride (dwords) */ + emit->ds->output_size * num_vertices * 4, /* ds primitive stride */ + emit->ds->output_size * 4, /* ds vertex stride */ + emit->hs->output_size, /* hs vertex stride (dwords) */ emit->hs->shader->nir->info.tess.tcs_vertices_out }; @@ -216,8 +216,8 @@ emit_tess_consts(struct fd6_emit *emit) prev = emit->vs; uint32_t gs_params[4] = { - prev->shader->output_size * num_vertices * 4, /* ds primitive stride */ - prev->shader->output_size * 4, /* ds vertex stride */ + prev->output_size * num_vertices * 4, /* ds primitive stride */ + prev->output_size * 4, /* ds vertex stride */ 0, 0, }; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index 232262765a3..0279b2ee872 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -258,7 +258,7 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, ctx->batch->tessellation = true; ctx->batch->tessparam_size = MAX2(ctx->batch->tessparam_size, - emit.hs->shader->output_size * 4 * info->count); + emit.hs->output_size * 4 * info->count); ctx->batch->tessfactor_size = MAX2(ctx->batch->tessfactor_size, factor_stride * info->count); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 3a8f480a3cc..be561a040c6 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -562,10 +562,10 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, /* Total attribute slots in HS incoming patch. */ OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9801, 1); - OUT_RING(ring, hs_info->tess.tcs_vertices_out * vs->shader->output_size / 4); + OUT_RING(ring, hs_info->tess.tcs_vertices_out * vs->output_size / 4); OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1); - OUT_RING(ring, vs->shader->output_size); + OUT_RING(ring, vs->output_size); shader_info *ds_info = &ds->shader->nir->info; OUT_PKT4(ring, REG_A6XX_PC_TESS_CNTL, 1); @@ -791,7 +791,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, /* Size of per-primitive alloction in ldlw memory in vec4s. */ uint32_t vec4_size = gs->shader->nir->info.gs.vertices_in * - DIV_ROUND_UP(prev->shader->output_size, 4); + DIV_ROUND_UP(prev->output_size, 4); OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size)); @@ -799,7 +799,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_RING(ring, 0); OUT_PKT4(ring, REG_A6XX_SP_GS_PRIM_SIZE, 1); - OUT_RING(ring, prev->shader->output_size); + OUT_RING(ring, prev->output_size); } else { OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); OUT_RING(ring, 0); -- 2.30.2