X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fv3d%2Fv3d_program.c;h=ad6a796866a3e4cadd43b453e00e96d80312eaba;hb=522bd414f343c7a132fee17d0d6b755b9ec6766c;hp=17ded7571c43ff11f7cbc46de7ec847e24b779e1;hpb=17c81989521731af25a633622a0ded437538ab00;p=mesa.git diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c index 17ded7571c4..ad6a796866a 100644 --- a/src/gallium/drivers/v3d/v3d_program.c +++ b/src/gallium/drivers/v3d/v3d_program.c @@ -22,11 +22,12 @@ */ #include -#include "util/u_format.h" +#include "util/format/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/ralloc.h" #include "util/hash_table.h" +#include "util/u_upload_mgr.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "compiler/nir/nir.h" @@ -35,7 +36,13 @@ #include "compiler/v3d_compiler.h" #include "v3d_context.h" #include "broadcom/cle/v3d_packet_v33_pack.h" -#include "mesa/state_tracker/st_glsl_types.h" + +static struct v3d_compiled_shader * +v3d_get_compiled_shader(struct v3d_context *v3d, + struct v3d_key *key, size_t key_size); +static void +v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, + struct v3d_key *key); static gl_varying_slot v3d_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) @@ -163,20 +170,115 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so, } static int -type_size(const struct glsl_type *type) +type_size(const struct glsl_type *type, bool bindless) { return glsl_count_attribute_slots(type, false); } -static int -uniforms_type_size(const struct glsl_type *type) +static void +precompile_all_outputs(nir_shader *s, + struct v3d_varying_slot *outputs, + uint8_t *num_outputs) { - return st_glsl_storage_type_size(type, false); + nir_foreach_variable(var, &s->outputs) { + const int array_len = MAX2(glsl_get_length(var->type), 1); + for (int j = 0; j < array_len; j++) { + const int slot = var->data.location + j; + const int num_components = + glsl_get_components(var->type); + for (int i = 0; i < num_components; i++) { + const int swiz = var->data.location_frac + i; + outputs[(*num_outputs)++] = + v3d_slot_from_slot_and_component(slot, + swiz); + } + } + } +} + +/** + * Precompiles a shader variant at shader state creation time if + * V3D_DEBUG=precompile is set. Used for shader-db + * (https://gitlab.freedesktop.org/mesa/shader-db) + */ +static void +v3d_shader_precompile(struct v3d_context *v3d, + struct v3d_uncompiled_shader *so) +{ + nir_shader *s = so->base.ir.nir; + + if (s->info.stage == MESA_SHADER_FRAGMENT) { + struct v3d_fs_key key = { + .base.shader_state = so, + }; + + nir_foreach_variable(var, &s->outputs) { + if (var->data.location == FRAG_RESULT_COLOR) { + key.cbufs |= 1 << 0; + } else if (var->data.location >= FRAG_RESULT_DATA0) { + key.cbufs |= 1 << (var->data.location - + FRAG_RESULT_DATA0); + } + } + + key.logicop_func = PIPE_LOGICOP_COPY; + + v3d_setup_shared_precompile_key(so, &key.base); + v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); + } else if (s->info.stage == MESA_SHADER_GEOMETRY) { + struct v3d_gs_key key = { + .base.shader_state = so, + .base.is_last_geometry_stage = true, + }; + + v3d_setup_shared_precompile_key(so, &key.base); + + precompile_all_outputs(s, + key.used_outputs, + &key.num_used_outputs); + + v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); + + /* Compile GS bin shader: only position (XXX: include TF) */ + key.is_coord = true; + key.num_used_outputs = 0; + for (int i = 0; i < 4; i++) { + key.used_outputs[key.num_used_outputs++] = + v3d_slot_from_slot_and_component(VARYING_SLOT_POS, + i); + } + v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); + } else { + assert(s->info.stage == MESA_SHADER_VERTEX); + struct v3d_vs_key key = { + .base.shader_state = so, + /* Emit fixed function outputs */ + .base.is_last_geometry_stage = true, + }; + + v3d_setup_shared_precompile_key(so, &key.base); + + precompile_all_outputs(s, + key.used_outputs, + &key.num_used_outputs); + + v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); + + /* Compile VS bin shader: only position (XXX: include TF) */ + key.is_coord = true; + key.num_used_outputs = 0; + for (int i = 0; i < 4; i++) { + key.used_outputs[key.num_used_outputs++] = + v3d_slot_from_slot_and_component(VARYING_SLOT_POS, + i); + } + v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); + } } static void * -v3d_shader_state_create(struct pipe_context *pctx, - const struct pipe_shader_state *cso) +v3d_uncompiled_shader_create(struct pipe_context *pctx, + enum pipe_shader_ir type, void *ir) { struct v3d_context *v3d = v3d_context(pctx); struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader); @@ -187,37 +289,32 @@ v3d_shader_state_create(struct pipe_context *pctx, nir_shader *s; - if (cso->type == PIPE_SHADER_IR_NIR) { + if (type == PIPE_SHADER_IR_NIR) { /* The backend takes ownership of the NIR shader on state * creation. */ - s = cso->ir.nir; - - NIR_PASS_V(s, nir_lower_io, nir_var_uniform, - uniforms_type_size, - (nir_lower_io_options)0); + s = ir; } else { - assert(cso->type == PIPE_SHADER_IR_TGSI); + assert(type == PIPE_SHADER_IR_TGSI); if (V3D_DEBUG & V3D_DEBUG_TGSI) { fprintf(stderr, "prog %d TGSI:\n", so->program_id); - tgsi_dump(cso->tokens, 0); + tgsi_dump(ir, 0); fprintf(stderr, "\n"); } - s = tgsi_to_nir(cso->tokens, &v3d_nir_options); - - so->was_tgsi = true; + s = tgsi_to_nir(ir, pctx->screen, false); } nir_variable_mode lower_mode = nir_var_all & ~nir_var_uniform; - if (s->info.stage == MESA_SHADER_VERTEX) - lower_mode &= ~nir_var_shader_in; + if (s->info.stage == MESA_SHADER_VERTEX || + s->info.stage == MESA_SHADER_GEOMETRY) { + lower_mode &= ~(nir_var_shader_in | nir_var_shader_out); + } NIR_PASS_V(s, nir_lower_io, lower_mode, type_size, (nir_lower_io_options)0); - NIR_PASS_V(s, nir_opt_global_to_local); NIR_PASS_V(s, nir_lower_regs_to_ssa); NIR_PASS_V(s, nir_normalize_cubemap_coords); @@ -225,7 +322,7 @@ v3d_shader_state_create(struct pipe_context *pctx, v3d_optimize_nir(s); - NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local); + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp); /* Garbage collect dead instructions */ nir_sweep(s); @@ -233,8 +330,6 @@ v3d_shader_state_create(struct pipe_context *pctx, so->base.type = PIPE_SHADER_IR_NIR; so->base.ir.nir = s; - v3d_set_transform_feedback_outputs(so, &cso->stream_output); - if (V3D_DEBUG & (V3D_DEBUG_NIR | v3d_debug_flag_for_shader_stage(s->info.stage))) { fprintf(stderr, "%s prog %d NIR:\n", @@ -244,25 +339,45 @@ v3d_shader_state_create(struct pipe_context *pctx, fprintf(stderr, "\n"); } + if (V3D_DEBUG & V3D_DEBUG_PRECOMPILE) + v3d_shader_precompile(v3d, so); + return so; } -static struct v3d_compiled_shader * -v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key) +static void +v3d_shader_debug_output(const char *message, void *data) +{ + struct v3d_context *v3d = data; + + pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message); +} + +static void * +v3d_shader_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct v3d_uncompiled_shader *so = + v3d_uncompiled_shader_create(pctx, + cso->type, + (cso->type == PIPE_SHADER_IR_TGSI ? + (void *)cso->tokens : + cso->ir.nir)); + + v3d_set_transform_feedback_outputs(so, &cso->stream_output); + + return so; +} + +struct v3d_compiled_shader * +v3d_get_compiled_shader(struct v3d_context *v3d, + struct v3d_key *key, + size_t key_size) { struct v3d_uncompiled_shader *shader_state = key->shader_state; nir_shader *s = shader_state->base.ir.nir; - struct hash_table *ht; - uint32_t key_size; - if (s->info.stage == MESA_SHADER_FRAGMENT) { - ht = v3d->fs_cache; - key_size = sizeof(struct v3d_fs_key); - } else { - ht = v3d->vs_cache; - key_size = sizeof(struct v3d_vs_key); - } - + struct hash_table *ht = v3d->prog.cache[s->info.stage]; struct hash_entry *entry = _mesa_hash_table_search(ht, key); if (entry) return entry->data; @@ -276,49 +391,38 @@ v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key) uint64_t *qpu_insts; uint32_t shader_size; - switch (s->info.stage) { - case MESA_SHADER_VERTEX: - shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data); - - qpu_insts = v3d_compile_vs(v3d->screen->compiler, - (struct v3d_vs_key *)key, - shader->prog_data.vs, s, - program_id, variant_id, - &shader_size); - break; - case MESA_SHADER_FRAGMENT: - shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data); - - qpu_insts = v3d_compile_fs(v3d->screen->compiler, - (struct v3d_fs_key *)key, - shader->prog_data.fs, s, - program_id, variant_id, - &shader_size); - break; - default: - unreachable("bad stage"); - } + qpu_insts = v3d_compile(v3d->screen->compiler, key, + &shader->prog_data.base, s, + v3d_shader_debug_output, + v3d, + program_id, variant_id, &shader_size); + ralloc_steal(shader, shader->prog_data.base); v3d_set_shader_uniform_dirty_flags(shader); - shader->bo = v3d_bo_alloc(v3d->screen, shader_size, "shader"); - v3d_bo_map(shader->bo); - memcpy(shader->bo->map, qpu_insts, shader_size); + if (shader_size) { + u_upload_data(v3d->state_uploader, 0, shader_size, 8, + qpu_insts, &shader->offset, &shader->resource); + } free(qpu_insts); - struct v3d_key *dup_key; - dup_key = ralloc_size(shader, key_size); - memcpy(dup_key, key, key_size); - _mesa_hash_table_insert(ht, dup_key, shader); + if (ht) { + struct v3d_key *dup_key; + dup_key = ralloc_size(shader, key_size); + memcpy(dup_key, key, key_size); + _mesa_hash_table_insert(ht, dup_key, shader); + } if (shader->prog_data.base->spill_size > v3d->prog.spill_size_per_thread) { - /* Max 4 QPUs per slice, 3 slices per core. We only do single - * core so far. This overallocates memory on smaller cores. + /* The TIDX register we use for choosing the area to access + * for scratch space is: (core << 6) | (qpu << 2) | thread. + * Even at minimum threadcount in a particular shader, that + * means we still multiply by qpus by 4. */ - int total_spill_size = - 4 * 3 * shader->prog_data.base->spill_size; + int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 * + shader->prog_data.base->spill_size); v3d_bo_unreference(&v3d->prog.spill_bo); v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen, @@ -330,6 +434,13 @@ v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key) return shader; } +static void +v3d_free_compiled_shader(struct v3d_compiled_shader *shader) +{ + pipe_resource_reference(&shader->resource, NULL); + ralloc_free(shader); +} + static void v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, struct v3d_texture_stateobj *texstate) @@ -379,8 +490,6 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, } if (sampler) { - key->tex[i].compare_mode = sampler_state->compare_mode; - key->tex[i].compare_func = sampler_state->compare_func; key->tex[i].clamp_s = sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP; key->tex[i].clamp_t = @@ -389,8 +498,23 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP; } } +} + +static void +v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, + struct v3d_key *key) +{ + nir_shader *s = uncompiled->base.ir.nir; + + for (int i = 0; i < s->info.num_textures; i++) { + key->tex[i].return_size = 16; + key->tex[i].return_channels = 2; - key->ucp_enables = v3d->rasterizer->base.clip_plane_enable; + key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; + key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; + key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; + key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; + } } static void @@ -399,6 +523,7 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) struct v3d_job *job = v3d->job; struct v3d_fs_key local_key; struct v3d_fs_key *key = &local_key; + nir_shader *s = v3d->prog.bind_fs->base.ir.nir; if (!(v3d->dirty & (VC5_DIRTY_PRIM_MODE | VC5_DIRTY_BLEND | @@ -412,8 +537,9 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) } memset(key, 0, sizeof(*key)); - v3d_setup_shared_key(v3d, &key->base, &v3d->fragtex); + v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]); key->base.shader_state = v3d->prog.bind_fs; + key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; key->is_points = (prim_mode == PIPE_PRIM_POINTS); key->is_lines = (prim_mode >= PIPE_PRIM_LINES && prim_mode <= PIPE_PRIM_LINE_STRIP); @@ -426,7 +552,7 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) if (job->msaa) { key->msaa = v3d->rasterizer->base.multisample; key->sample_coverage = (v3d->rasterizer->base.multisample && - v3d->sample_mask != (1 << VC5_MAX_SAMPLES) - 1); + v3d->sample_mask != (1 << V3D_MAX_SAMPLES) - 1); key->sample_alpha_to_coverage = v3d->blend->base.alpha_to_coverage; key->sample_alpha_to_one = v3d->blend->base.alpha_to_one; } @@ -438,17 +564,30 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) key->alpha_test_func = v3d->zsa->base.alpha.func; } - /* gl_FragColor's propagation to however many bound color buffers - * there are means that the buffer count needs to be in the key. - */ - key->nr_cbufs = v3d->framebuffer.nr_cbufs; key->swap_color_rb = v3d->swap_color_rb; - for (int i = 0; i < key->nr_cbufs; i++) { + for (int i = 0; i < v3d->framebuffer.nr_cbufs; i++) { struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i]; if (!cbuf) continue; + /* gl_FragColor's propagation to however many bound color + * buffers there are means that the shader compile needs to + * know what buffers are present. + */ + key->cbufs |= 1 << i; + + /* If logic operations are enabled then we might emit color + * reads and we need to know the color buffer format and + * swizzle for that. + */ + if (key->logicop_func != PIPE_LOGICOP_COPY) { + key->color_fmt[i].format = cbuf->format; + key->color_fmt[i].swizzle = + v3d_get_format_swizzle(&v3d->screen->devinfo, + cbuf->format); + } + const struct util_format_description *desc = util_format_description(cbuf->format); @@ -457,7 +596,7 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) key->f32_color_rb |= 1 << i; } - if (v3d->prog.bind_fs->was_tgsi) { + if (s->info.fs.untyped_color_outputs) { if (util_format_is_pure_uint(cbuf->format)) key->uint_color_rb |= 1 << i; else if (util_format_is_pure_sint(cbuf->format)) @@ -477,7 +616,7 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) key->shade_model_flat = v3d->rasterizer->base.flatshade; struct v3d_compiled_shader *old_fs = v3d->prog.fs; - v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base); + v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); if (v3d->prog.fs == old_fs) return; @@ -507,29 +646,116 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) } } +static void +v3d_update_compiled_gs(struct v3d_context *v3d, uint8_t prim_mode) +{ + struct v3d_gs_key local_key; + struct v3d_gs_key *key = &local_key; + + if (!(v3d->dirty & (VC5_DIRTY_GEOMTEX | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_UNCOMPILED_GS | + VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_FS_INPUTS))) { + return; + } + + if (!v3d->prog.bind_gs) { + v3d->prog.gs = NULL; + v3d->prog.gs_bin = NULL; + return; + } + + memset(key, 0, sizeof(*key)); + v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_GEOMETRY]); + key->base.shader_state = v3d->prog.bind_gs; + key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; + key->base.is_last_geometry_stage = true; + key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs; + STATIC_ASSERT(sizeof(key->used_outputs) == + sizeof(v3d->prog.fs->prog_data.fs->input_slots)); + memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots, + sizeof(key->used_outputs)); + + key->per_vertex_point_size = + (prim_mode == PIPE_PRIM_POINTS && + v3d->rasterizer->base.point_size_per_vertex); + + struct v3d_compiled_shader *gs = + v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); + if (gs != v3d->prog.gs) { + v3d->prog.gs = gs; + v3d->dirty |= VC5_DIRTY_COMPILED_GS; + } + + key->is_coord = true; + + /* The last bin-mode shader in the geometry pipeline only outputs + * varyings used by transform feedback. + */ + struct v3d_uncompiled_shader *shader_state = key->base.shader_state; + memcpy(key->used_outputs, shader_state->tf_outputs, + sizeof(*key->used_outputs) * shader_state->num_tf_outputs); + if (shader_state->num_tf_outputs < key->num_used_outputs) { + uint32_t size = sizeof(*key->used_outputs) * + (key->num_used_outputs - + shader_state->num_tf_outputs); + memset(&key->used_outputs[shader_state->num_tf_outputs], + 0, size); + } + key->num_used_outputs = shader_state->num_tf_outputs; + + struct v3d_compiled_shader *old_gs = v3d->prog.gs; + struct v3d_compiled_shader *gs_bin = + v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); + if (gs_bin != old_gs) { + v3d->prog.gs_bin = gs_bin; + v3d->dirty |= VC5_DIRTY_COMPILED_GS_BIN; + } + + if (old_gs && memcmp(v3d->prog.gs->prog_data.gs->input_slots, + old_gs->prog_data.gs->input_slots, + sizeof(v3d->prog.gs->prog_data.gs->input_slots))) { + v3d->dirty |= VC5_DIRTY_GS_INPUTS; + } +} + static void v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) { struct v3d_vs_key local_key; struct v3d_vs_key *key = &local_key; - if (!(v3d->dirty & (VC5_DIRTY_PRIM_MODE | - VC5_DIRTY_RASTERIZER | - VC5_DIRTY_VERTTEX | + if (!(v3d->dirty & (VC5_DIRTY_VERTTEX | VC5_DIRTY_VTXSTATE | VC5_DIRTY_UNCOMPILED_VS | - VC5_DIRTY_FS_INPUTS))) { + (v3d->prog.bind_gs ? 0 : VC5_DIRTY_RASTERIZER) | + (v3d->prog.bind_gs ? 0 : VC5_DIRTY_PRIM_MODE) | + (v3d->prog.bind_gs ? VC5_DIRTY_GS_INPUTS : + VC5_DIRTY_FS_INPUTS)))) { return; } memset(key, 0, sizeof(*key)); - v3d_setup_shared_key(v3d, &key->base, &v3d->verttex); + v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]); key->base.shader_state = v3d->prog.bind_vs; - key->num_fs_inputs = v3d->prog.fs->prog_data.fs->base.num_inputs; - STATIC_ASSERT(sizeof(key->fs_inputs) == - sizeof(v3d->prog.fs->prog_data.fs->input_slots)); - memcpy(key->fs_inputs, v3d->prog.fs->prog_data.fs->input_slots, - sizeof(key->fs_inputs)); + key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; + key->base.is_last_geometry_stage = !v3d->prog.bind_gs; + + if (!v3d->prog.bind_gs) { + key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs; + STATIC_ASSERT(sizeof(key->used_outputs) == + sizeof(v3d->prog.fs->prog_data.fs->input_slots)); + memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots, + sizeof(key->used_outputs)); + } else { + key->num_used_outputs = v3d->prog.gs->prog_data.gs->num_inputs; + STATIC_ASSERT(sizeof(key->used_outputs) == + sizeof(v3d->prog.gs->prog_data.gs->input_slots)); + memcpy(key->used_outputs, v3d->prog.gs->prog_data.gs->input_slots, + sizeof(key->used_outputs)); + } + key->clamp_color = v3d->rasterizer->base.clamp_vertex_color; key->per_vertex_point_size = @@ -537,27 +763,39 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) v3d->rasterizer->base.point_size_per_vertex); struct v3d_compiled_shader *vs = - v3d_get_compiled_shader(v3d, &key->base); + v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); if (vs != v3d->prog.vs) { v3d->prog.vs = vs; v3d->dirty |= VC5_DIRTY_COMPILED_VS; } key->is_coord = true; - /* Coord shaders only output varyings used by transform feedback. */ - struct v3d_uncompiled_shader *shader_state = key->base.shader_state; - memcpy(key->fs_inputs, shader_state->tf_outputs, - sizeof(*key->fs_inputs) * shader_state->num_tf_outputs); - if (shader_state->num_tf_outputs < key->num_fs_inputs) { - memset(&key->fs_inputs[shader_state->num_tf_outputs], - 0, - sizeof(*key->fs_inputs) * (key->num_fs_inputs - - shader_state->num_tf_outputs)); + + /* Coord shaders only output varyings used by transform feedback, + * unless they are linked to other shaders in the geometry side + * of the pipeline, since in that case any of the output varyings + * could be required in later geometry stages to compute + * gl_Position or TF outputs. + */ + if (!v3d->prog.bind_gs) { + struct v3d_uncompiled_shader *shader_state = + key->base.shader_state; + memcpy(key->used_outputs, shader_state->tf_outputs, + sizeof(*key->used_outputs) * + shader_state->num_tf_outputs); + if (shader_state->num_tf_outputs < key->num_used_outputs) { + uint32_t tail_bytes = + sizeof(*key->used_outputs) * + (key->num_used_outputs - + shader_state->num_tf_outputs); + memset(&key->used_outputs[shader_state->num_tf_outputs], + 0, tail_bytes); + } + key->num_used_outputs = shader_state->num_tf_outputs; } - key->num_fs_inputs = shader_state->num_tf_outputs; struct v3d_compiled_shader *cs = - v3d_get_compiled_shader(v3d, &key->base); + v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); if (cs != v3d->prog.cs) { v3d->prog.cs = cs; v3d->dirty |= VC5_DIRTY_COMPILED_CS; @@ -568,51 +806,79 @@ void v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode) { v3d_update_compiled_fs(v3d, prim_mode); + v3d_update_compiled_gs(v3d, prim_mode); v3d_update_compiled_vs(v3d, prim_mode); } +void +v3d_update_compiled_cs(struct v3d_context *v3d) +{ + struct v3d_key local_key; + struct v3d_key *key = &local_key; + + if (!(v3d->dirty & (VC5_DIRTY_UNCOMPILED_CS | + VC5_DIRTY_COMPTEX))) { + return; + } + + memset(key, 0, sizeof(*key)); + v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]); + key->shader_state = v3d->prog.bind_compute; + + struct v3d_compiled_shader *cs = + v3d_get_compiled_shader(v3d, key, sizeof(*key)); + if (cs != v3d->prog.compute) { + v3d->prog.compute = cs; + v3d->dirty |= VC5_DIRTY_COMPILED_CS; /* XXX */ + } +} + static uint32_t fs_cache_hash(const void *key) { return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); } +static uint32_t +gs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_gs_key)); +} + static uint32_t vs_cache_hash(const void *key) { return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); } +static uint32_t +cs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_key)); +} + static bool fs_cache_compare(const void *key1, const void *key2) { return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; } +static bool +gs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_gs_key)) == 0; +} + static bool vs_cache_compare(const void *key1, const void *key2) { return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; } -static void -delete_from_cache_if_matches(struct hash_table *ht, - struct v3d_compiled_shader **last_compile, - struct hash_entry *entry, - struct v3d_uncompiled_shader *so) +static bool +cs_cache_compare(const void *key1, const void *key2) { - const struct v3d_key *key = entry->key; - - if (key->shader_state == so) { - struct v3d_compiled_shader *shader = entry->data; - _mesa_hash_table_remove(ht, entry); - v3d_bo_unreference(&shader->bo); - - if (shader == *last_compile) - *last_compile = NULL; - - ralloc_free(shader); - } + return memcmp(key1, key2, sizeof(struct v3d_key)) == 0; } static void @@ -620,14 +886,26 @@ v3d_shader_state_delete(struct pipe_context *pctx, void *hwcso) { struct v3d_context *v3d = v3d_context(pctx); struct v3d_uncompiled_shader *so = hwcso; + nir_shader *s = so->base.ir.nir; - hash_table_foreach(v3d->fs_cache, entry) { - delete_from_cache_if_matches(v3d->fs_cache, &v3d->prog.fs, - entry, so); - } - hash_table_foreach(v3d->vs_cache, entry) { - delete_from_cache_if_matches(v3d->vs_cache, &v3d->prog.vs, - entry, so); + hash_table_foreach(v3d->prog.cache[s->info.stage], entry) { + const struct v3d_key *key = entry->key; + struct v3d_compiled_shader *shader = entry->data; + + if (key->shader_state != so) + continue; + + if (v3d->prog.fs == shader) + v3d->prog.fs = NULL; + if (v3d->prog.vs == shader) + v3d->prog.vs = NULL; + if (v3d->prog.cs == shader) + v3d->prog.cs = NULL; + if (v3d->prog.compute == shader) + v3d->prog.compute = NULL; + + _mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry); + v3d_free_compiled_shader(shader); } ralloc_free(so->base.ir.nir); @@ -642,6 +920,14 @@ v3d_fp_state_bind(struct pipe_context *pctx, void *hwcso) v3d->dirty |= VC5_DIRTY_UNCOMPILED_FS; } +static void +v3d_gp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct v3d_context *v3d = v3d_context(pctx); + v3d->prog.bind_gs = hwcso; + v3d->dirty |= VC5_DIRTY_UNCOMPILED_GS; +} + static void v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso) { @@ -650,6 +936,23 @@ v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso) v3d->dirty |= VC5_DIRTY_UNCOMPILED_VS; } +static void +v3d_compute_state_bind(struct pipe_context *pctx, void *state) +{ + struct v3d_context *v3d = v3d_context(pctx); + + v3d->prog.bind_compute = state; + v3d->dirty |= VC5_DIRTY_UNCOMPILED_CS; +} + +static void * +v3d_create_compute_state(struct pipe_context *pctx, + const struct pipe_compute_state *cso) +{ + return v3d_uncompiled_shader_create(pctx, cso->ir_type, + (void *)cso->prog); +} + void v3d_program_init(struct pipe_context *pctx) { @@ -658,16 +961,30 @@ v3d_program_init(struct pipe_context *pctx) pctx->create_vs_state = v3d_shader_state_create; pctx->delete_vs_state = v3d_shader_state_delete; + pctx->create_gs_state = v3d_shader_state_create; + pctx->delete_gs_state = v3d_shader_state_delete; + pctx->create_fs_state = v3d_shader_state_create; pctx->delete_fs_state = v3d_shader_state_delete; pctx->bind_fs_state = v3d_fp_state_bind; + pctx->bind_gs_state = v3d_gp_state_bind; pctx->bind_vs_state = v3d_vp_state_bind; - v3d->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash, - fs_cache_compare); - v3d->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash, - vs_cache_compare); + if (v3d->screen->has_csd) { + pctx->create_compute_state = v3d_create_compute_state; + pctx->delete_compute_state = v3d_shader_state_delete; + pctx->bind_compute_state = v3d_compute_state_bind; + } + + v3d->prog.cache[MESA_SHADER_VERTEX] = + _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare); + v3d->prog.cache[MESA_SHADER_GEOMETRY] = + _mesa_hash_table_create(pctx, gs_cache_hash, gs_cache_compare); + v3d->prog.cache[MESA_SHADER_FRAGMENT] = + _mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare); + v3d->prog.cache[MESA_SHADER_COMPUTE] = + _mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare); } void @@ -675,18 +992,16 @@ v3d_program_fini(struct pipe_context *pctx) { struct v3d_context *v3d = v3d_context(pctx); - hash_table_foreach(v3d->fs_cache, entry) { - struct v3d_compiled_shader *shader = entry->data; - v3d_bo_unreference(&shader->bo); - ralloc_free(shader); - _mesa_hash_table_remove(v3d->fs_cache, entry); - } + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + struct hash_table *cache = v3d->prog.cache[i]; + if (!cache) + continue; - hash_table_foreach(v3d->vs_cache, entry) { - struct v3d_compiled_shader *shader = entry->data; - v3d_bo_unreference(&shader->bo); - ralloc_free(shader); - _mesa_hash_table_remove(v3d->vs_cache, entry); + hash_table_foreach(cache, entry) { + struct v3d_compiled_shader *shader = entry->data; + v3d_free_compiled_shader(shader); + _mesa_hash_table_remove(cache, entry); + } } v3d_bo_unreference(&v3d->prog.spill_bo);