*/
#include <inttypes.h>
-#include "util/u_format.h"
+#include "util/format/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/ralloc.h"
#include "util/hash_table.h"
+#include "util/u_upload_mgr.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "compiler/nir/nir.h"
#include "compiler/v3d_compiler.h"
#include "v3d_context.h"
#include "broadcom/cle/v3d_packet_v33_pack.h"
-#include "mesa/state_tracker/st_glsl_types.h"
+
+static struct v3d_compiled_shader *
+v3d_get_compiled_shader(struct v3d_context *v3d,
+ struct v3d_key *key, size_t key_size);
+static void
+v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled,
+ struct v3d_key *key);
static gl_varying_slot
v3d_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location)
}
static int
-type_size(const struct glsl_type *type)
+type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
-static int
-uniforms_type_size(const struct glsl_type *type)
+static void
+precompile_all_outputs(nir_shader *s,
+ struct v3d_varying_slot *outputs,
+ uint8_t *num_outputs)
{
- return st_glsl_storage_type_size(type, false);
+ nir_foreach_variable(var, &s->outputs) {
+ const int array_len = MAX2(glsl_get_length(var->type), 1);
+ for (int j = 0; j < array_len; j++) {
+ const int slot = var->data.location + j;
+ const int num_components =
+ glsl_get_components(var->type);
+ for (int i = 0; i < num_components; i++) {
+ const int swiz = var->data.location_frac + i;
+ outputs[(*num_outputs)++] =
+ v3d_slot_from_slot_and_component(slot,
+ swiz);
+ }
+ }
+ }
+}
+
+/**
+ * Precompiles a shader variant at shader state creation time if
+ * V3D_DEBUG=precompile is set. Used for shader-db
+ * (https://gitlab.freedesktop.org/mesa/shader-db)
+ */
+static void
+v3d_shader_precompile(struct v3d_context *v3d,
+ struct v3d_uncompiled_shader *so)
+{
+ nir_shader *s = so->base.ir.nir;
+
+ if (s->info.stage == MESA_SHADER_FRAGMENT) {
+ struct v3d_fs_key key = {
+ .base.shader_state = so,
+ };
+
+ nir_foreach_variable(var, &s->outputs) {
+ if (var->data.location == FRAG_RESULT_COLOR) {
+ key.cbufs |= 1 << 0;
+ } else if (var->data.location >= FRAG_RESULT_DATA0) {
+ key.cbufs |= 1 << (var->data.location -
+ FRAG_RESULT_DATA0);
+ }
+ }
+
+ key.logicop_func = PIPE_LOGICOP_COPY;
+
+ v3d_setup_shared_precompile_key(so, &key.base);
+ v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
+ } else if (s->info.stage == MESA_SHADER_GEOMETRY) {
+ struct v3d_gs_key key = {
+ .base.shader_state = so,
+ .base.is_last_geometry_stage = true,
+ };
+
+ v3d_setup_shared_precompile_key(so, &key.base);
+
+ precompile_all_outputs(s,
+ key.used_outputs,
+ &key.num_used_outputs);
+
+ v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
+
+ /* Compile GS bin shader: only position (XXX: include TF) */
+ key.is_coord = true;
+ key.num_used_outputs = 0;
+ for (int i = 0; i < 4; i++) {
+ key.used_outputs[key.num_used_outputs++] =
+ v3d_slot_from_slot_and_component(VARYING_SLOT_POS,
+ i);
+ }
+ v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
+ } else {
+ assert(s->info.stage == MESA_SHADER_VERTEX);
+ struct v3d_vs_key key = {
+ .base.shader_state = so,
+ /* Emit fixed function outputs */
+ .base.is_last_geometry_stage = true,
+ };
+
+ v3d_setup_shared_precompile_key(so, &key.base);
+
+ precompile_all_outputs(s,
+ key.used_outputs,
+ &key.num_used_outputs);
+
+ v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
+
+ /* Compile VS bin shader: only position (XXX: include TF) */
+ key.is_coord = true;
+ key.num_used_outputs = 0;
+ for (int i = 0; i < 4; i++) {
+ key.used_outputs[key.num_used_outputs++] =
+ v3d_slot_from_slot_and_component(VARYING_SLOT_POS,
+ i);
+ }
+ v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
+ }
}
static void *
-v3d_shader_state_create(struct pipe_context *pctx,
- const struct pipe_shader_state *cso)
+v3d_uncompiled_shader_create(struct pipe_context *pctx,
+ enum pipe_shader_ir type, void *ir)
{
struct v3d_context *v3d = v3d_context(pctx);
struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader);
nir_shader *s;
- if (cso->type == PIPE_SHADER_IR_NIR) {
+ if (type == PIPE_SHADER_IR_NIR) {
/* The backend takes ownership of the NIR shader on state
* creation.
*/
- s = cso->ir.nir;
-
- NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform,
- type_size,
- (nir_lower_io_options)0);
- NIR_PASS_V(s, nir_lower_io, nir_var_uniform,
- uniforms_type_size,
- (nir_lower_io_options)0);
+ s = ir;
} else {
- assert(cso->type == PIPE_SHADER_IR_TGSI);
+ assert(type == PIPE_SHADER_IR_TGSI);
if (V3D_DEBUG & V3D_DEBUG_TGSI) {
fprintf(stderr, "prog %d TGSI:\n",
so->program_id);
- tgsi_dump(cso->tokens, 0);
+ tgsi_dump(ir, 0);
fprintf(stderr, "\n");
}
- s = tgsi_to_nir(cso->tokens, &v3d_nir_options);
+ s = tgsi_to_nir(ir, pctx->screen, false);
+ }
- so->was_tgsi = true;
+ nir_variable_mode lower_mode = nir_var_all & ~nir_var_uniform;
+ if (s->info.stage == MESA_SHADER_VERTEX ||
+ s->info.stage == MESA_SHADER_GEOMETRY) {
+ lower_mode &= ~(nir_var_shader_in | nir_var_shader_out);
}
+ NIR_PASS_V(s, nir_lower_io, lower_mode,
+ type_size,
+ (nir_lower_io_options)0);
- NIR_PASS_V(s, nir_opt_global_to_local);
NIR_PASS_V(s, nir_lower_regs_to_ssa);
NIR_PASS_V(s, nir_normalize_cubemap_coords);
v3d_optimize_nir(s);
- NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local);
+ NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
/* Garbage collect dead instructions */
nir_sweep(s);
so->base.type = PIPE_SHADER_IR_NIR;
so->base.ir.nir = s;
- v3d_set_transform_feedback_outputs(so, &cso->stream_output);
-
if (V3D_DEBUG & (V3D_DEBUG_NIR |
v3d_debug_flag_for_shader_stage(s->info.stage))) {
fprintf(stderr, "%s prog %d NIR:\n",
fprintf(stderr, "\n");
}
+ if (V3D_DEBUG & V3D_DEBUG_PRECOMPILE)
+ v3d_shader_precompile(v3d, so);
+
return so;
}
-static struct v3d_compiled_shader *
-v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key)
+static void
+v3d_shader_debug_output(const char *message, void *data)
+{
+ struct v3d_context *v3d = data;
+
+ pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message);
+}
+
+static void *
+v3d_shader_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ struct v3d_uncompiled_shader *so =
+ v3d_uncompiled_shader_create(pctx,
+ cso->type,
+ (cso->type == PIPE_SHADER_IR_TGSI ?
+ (void *)cso->tokens :
+ cso->ir.nir));
+
+ v3d_set_transform_feedback_outputs(so, &cso->stream_output);
+
+ return so;
+}
+
+struct v3d_compiled_shader *
+v3d_get_compiled_shader(struct v3d_context *v3d,
+ struct v3d_key *key,
+ size_t key_size)
{
struct v3d_uncompiled_shader *shader_state = key->shader_state;
nir_shader *s = shader_state->base.ir.nir;
- struct hash_table *ht;
- uint32_t key_size;
- if (s->info.stage == MESA_SHADER_FRAGMENT) {
- ht = v3d->fs_cache;
- key_size = sizeof(struct v3d_fs_key);
- } else {
- ht = v3d->vs_cache;
- key_size = sizeof(struct v3d_vs_key);
- }
-
+ struct hash_table *ht = v3d->prog.cache[s->info.stage];
struct hash_entry *entry = _mesa_hash_table_search(ht, key);
if (entry)
return entry->data;
uint64_t *qpu_insts;
uint32_t shader_size;
- switch (s->info.stage) {
- case MESA_SHADER_VERTEX:
- shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data);
-
- qpu_insts = v3d_compile_vs(v3d->screen->compiler,
- (struct v3d_vs_key *)key,
- shader->prog_data.vs, s,
- program_id, variant_id,
- &shader_size);
- break;
- case MESA_SHADER_FRAGMENT:
- shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data);
-
- qpu_insts = v3d_compile_fs(v3d->screen->compiler,
- (struct v3d_fs_key *)key,
- shader->prog_data.fs, s,
- program_id, variant_id,
- &shader_size);
- break;
- default:
- unreachable("bad stage");
- }
+ qpu_insts = v3d_compile(v3d->screen->compiler, key,
+ &shader->prog_data.base, s,
+ v3d_shader_debug_output,
+ v3d,
+ program_id, variant_id, &shader_size);
+ ralloc_steal(shader, shader->prog_data.base);
v3d_set_shader_uniform_dirty_flags(shader);
- shader->bo = v3d_bo_alloc(v3d->screen, shader_size, "shader");
- v3d_bo_map(shader->bo);
- memcpy(shader->bo->map, qpu_insts, shader_size);
+ if (shader_size) {
+ u_upload_data(v3d->state_uploader, 0, shader_size, 8,
+ qpu_insts, &shader->offset, &shader->resource);
+ }
free(qpu_insts);
- struct v3d_key *dup_key;
- dup_key = ralloc_size(shader, key_size);
- memcpy(dup_key, key, key_size);
- _mesa_hash_table_insert(ht, dup_key, shader);
+ if (ht) {
+ struct v3d_key *dup_key;
+ dup_key = ralloc_size(shader, key_size);
+ memcpy(dup_key, key, key_size);
+ _mesa_hash_table_insert(ht, dup_key, shader);
+ }
if (shader->prog_data.base->spill_size >
v3d->prog.spill_size_per_thread) {
- /* Max 4 QPUs per slice, 3 slices per core. We only do single
- * core so far. This overallocates memory on smaller cores.
+ /* The TIDX register we use for choosing the area to access
+ * for scratch space is: (core << 6) | (qpu << 2) | thread.
+ * Even at minimum threadcount in a particular shader, that
+ * means we still multiply by qpus by 4.
*/
- int total_spill_size =
- 4 * 3 * shader->prog_data.base->spill_size;
+ int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 *
+ shader->prog_data.base->spill_size);
v3d_bo_unreference(&v3d->prog.spill_bo);
v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen,
return shader;
}
+static void
+v3d_free_compiled_shader(struct v3d_compiled_shader *shader)
+{
+ pipe_resource_reference(&shader->resource, NULL);
+ ralloc_free(shader);
+}
+
static void
v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
struct v3d_texture_stateobj *texstate)
}
if (sampler) {
- key->tex[i].compare_mode = sampler_state->compare_mode;
- key->tex[i].compare_func = sampler_state->compare_func;
key->tex[i].clamp_s =
sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP;
key->tex[i].clamp_t =
sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP;
}
}
+}
+
+static void
+v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled,
+ struct v3d_key *key)
+{
+ nir_shader *s = uncompiled->base.ir.nir;
- key->ucp_enables = v3d->rasterizer->base.clip_plane_enable;
+ for (int i = 0; i < s->info.num_textures; i++) {
+ key->tex[i].return_size = 16;
+ key->tex[i].return_channels = 2;
+
+ key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
+ key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
+ key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
+ key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
+ }
}
static void
struct v3d_job *job = v3d->job;
struct v3d_fs_key local_key;
struct v3d_fs_key *key = &local_key;
+ nir_shader *s = v3d->prog.bind_fs->base.ir.nir;
if (!(v3d->dirty & (VC5_DIRTY_PRIM_MODE |
VC5_DIRTY_BLEND |
}
memset(key, 0, sizeof(*key));
- v3d_setup_shared_key(v3d, &key->base, &v3d->fragtex);
+ v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]);
key->base.shader_state = v3d->prog.bind_fs;
+ key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
key->is_points = (prim_mode == PIPE_PRIM_POINTS);
key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
prim_mode <= PIPE_PRIM_LINE_STRIP);
if (job->msaa) {
key->msaa = v3d->rasterizer->base.multisample;
key->sample_coverage = (v3d->rasterizer->base.multisample &&
- v3d->sample_mask != (1 << VC5_MAX_SAMPLES) - 1);
+ v3d->sample_mask != (1 << V3D_MAX_SAMPLES) - 1);
key->sample_alpha_to_coverage = v3d->blend->base.alpha_to_coverage;
key->sample_alpha_to_one = v3d->blend->base.alpha_to_one;
}
key->alpha_test_func = v3d->zsa->base.alpha.func;
}
- /* gl_FragColor's propagation to however many bound color buffers
- * there are means that the buffer count needs to be in the key.
- */
- key->nr_cbufs = v3d->framebuffer.nr_cbufs;
key->swap_color_rb = v3d->swap_color_rb;
- for (int i = 0; i < key->nr_cbufs; i++) {
+ for (int i = 0; i < v3d->framebuffer.nr_cbufs; i++) {
struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i];
if (!cbuf)
continue;
+ /* gl_FragColor's propagation to however many bound color
+ * buffers there are means that the shader compile needs to
+ * know what buffers are present.
+ */
+ key->cbufs |= 1 << i;
+
+ /* If logic operations are enabled then we might emit color
+ * reads and we need to know the color buffer format and
+ * swizzle for that.
+ */
+ if (key->logicop_func != PIPE_LOGICOP_COPY) {
+ key->color_fmt[i].format = cbuf->format;
+ key->color_fmt[i].swizzle =
+ v3d_get_format_swizzle(&v3d->screen->devinfo,
+ cbuf->format);
+ }
+
const struct util_format_description *desc =
util_format_description(cbuf->format);
key->f32_color_rb |= 1 << i;
}
- if (v3d->prog.bind_fs->was_tgsi) {
+ if (s->info.fs.untyped_color_outputs) {
if (util_format_is_pure_uint(cbuf->format))
key->uint_color_rb |= 1 << i;
else if (util_format_is_pure_sint(cbuf->format))
key->shade_model_flat = v3d->rasterizer->base.flatshade;
struct v3d_compiled_shader *old_fs = v3d->prog.fs;
- v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base);
+ v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
if (v3d->prog.fs == old_fs)
return;
v3d->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
}
+ if (v3d->prog.fs->prog_data.fs->noperspective_flags !=
+ old_fs->prog_data.fs->noperspective_flags) {
+ v3d->dirty |= VC5_DIRTY_NOPERSPECTIVE_FLAGS;
+ }
+
if (v3d->prog.fs->prog_data.fs->centroid_flags !=
old_fs->prog_data.fs->centroid_flags) {
v3d->dirty |= VC5_DIRTY_CENTROID_FLAGS;
}
}
+static void
+v3d_update_compiled_gs(struct v3d_context *v3d, uint8_t prim_mode)
+{
+ struct v3d_gs_key local_key;
+ struct v3d_gs_key *key = &local_key;
+
+ if (!(v3d->dirty & (VC5_DIRTY_GEOMTEX |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_UNCOMPILED_GS |
+ VC5_DIRTY_PRIM_MODE |
+ VC5_DIRTY_FS_INPUTS))) {
+ return;
+ }
+
+ if (!v3d->prog.bind_gs) {
+ v3d->prog.gs = NULL;
+ v3d->prog.gs_bin = NULL;
+ return;
+ }
+
+ memset(key, 0, sizeof(*key));
+ v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_GEOMETRY]);
+ key->base.shader_state = v3d->prog.bind_gs;
+ key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
+ key->base.is_last_geometry_stage = true;
+ key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs;
+ STATIC_ASSERT(sizeof(key->used_outputs) ==
+ sizeof(v3d->prog.fs->prog_data.fs->input_slots));
+ memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots,
+ sizeof(key->used_outputs));
+
+ key->per_vertex_point_size =
+ (prim_mode == PIPE_PRIM_POINTS &&
+ v3d->rasterizer->base.point_size_per_vertex);
+
+ struct v3d_compiled_shader *gs =
+ v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
+ if (gs != v3d->prog.gs) {
+ v3d->prog.gs = gs;
+ v3d->dirty |= VC5_DIRTY_COMPILED_GS;
+ }
+
+ key->is_coord = true;
+
+ /* The last bin-mode shader in the geometry pipeline only outputs
+ * varyings used by transform feedback.
+ */
+ struct v3d_uncompiled_shader *shader_state = key->base.shader_state;
+ memcpy(key->used_outputs, shader_state->tf_outputs,
+ sizeof(*key->used_outputs) * shader_state->num_tf_outputs);
+ if (shader_state->num_tf_outputs < key->num_used_outputs) {
+ uint32_t size = sizeof(*key->used_outputs) *
+ (key->num_used_outputs -
+ shader_state->num_tf_outputs);
+ memset(&key->used_outputs[shader_state->num_tf_outputs],
+ 0, size);
+ }
+ key->num_used_outputs = shader_state->num_tf_outputs;
+
+ struct v3d_compiled_shader *old_gs = v3d->prog.gs;
+ struct v3d_compiled_shader *gs_bin =
+ v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
+ if (gs_bin != old_gs) {
+ v3d->prog.gs_bin = gs_bin;
+ v3d->dirty |= VC5_DIRTY_COMPILED_GS_BIN;
+ }
+
+ if (old_gs && memcmp(v3d->prog.gs->prog_data.gs->input_slots,
+ old_gs->prog_data.gs->input_slots,
+ sizeof(v3d->prog.gs->prog_data.gs->input_slots))) {
+ v3d->dirty |= VC5_DIRTY_GS_INPUTS;
+ }
+}
+
static void
v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode)
{
struct v3d_vs_key local_key;
struct v3d_vs_key *key = &local_key;
- if (!(v3d->dirty & (VC5_DIRTY_PRIM_MODE |
- VC5_DIRTY_RASTERIZER |
- VC5_DIRTY_VERTTEX |
+ if (!(v3d->dirty & (VC5_DIRTY_VERTTEX |
VC5_DIRTY_VTXSTATE |
VC5_DIRTY_UNCOMPILED_VS |
- VC5_DIRTY_FS_INPUTS))) {
+ (v3d->prog.bind_gs ? 0 : VC5_DIRTY_RASTERIZER) |
+ (v3d->prog.bind_gs ? 0 : VC5_DIRTY_PRIM_MODE) |
+ (v3d->prog.bind_gs ? VC5_DIRTY_GS_INPUTS :
+ VC5_DIRTY_FS_INPUTS)))) {
return;
}
memset(key, 0, sizeof(*key));
- v3d_setup_shared_key(v3d, &key->base, &v3d->verttex);
+ v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]);
key->base.shader_state = v3d->prog.bind_vs;
- key->num_fs_inputs = v3d->prog.fs->prog_data.fs->base.num_inputs;
- STATIC_ASSERT(sizeof(key->fs_inputs) ==
- sizeof(v3d->prog.fs->prog_data.fs->input_slots));
- memcpy(key->fs_inputs, v3d->prog.fs->prog_data.fs->input_slots,
- sizeof(key->fs_inputs));
+ key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
+ key->base.is_last_geometry_stage = !v3d->prog.bind_gs;
+
+ if (!v3d->prog.bind_gs) {
+ key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs;
+ STATIC_ASSERT(sizeof(key->used_outputs) ==
+ sizeof(v3d->prog.fs->prog_data.fs->input_slots));
+ memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots,
+ sizeof(key->used_outputs));
+ } else {
+ key->num_used_outputs = v3d->prog.gs->prog_data.gs->num_inputs;
+ STATIC_ASSERT(sizeof(key->used_outputs) ==
+ sizeof(v3d->prog.gs->prog_data.gs->input_slots));
+ memcpy(key->used_outputs, v3d->prog.gs->prog_data.gs->input_slots,
+ sizeof(key->used_outputs));
+ }
+
key->clamp_color = v3d->rasterizer->base.clamp_vertex_color;
key->per_vertex_point_size =
v3d->rasterizer->base.point_size_per_vertex);
struct v3d_compiled_shader *vs =
- v3d_get_compiled_shader(v3d, &key->base);
+ v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
if (vs != v3d->prog.vs) {
v3d->prog.vs = vs;
v3d->dirty |= VC5_DIRTY_COMPILED_VS;
}
key->is_coord = true;
- /* Coord shaders only output varyings used by transform feedback. */
- struct v3d_uncompiled_shader *shader_state = key->base.shader_state;
- memcpy(key->fs_inputs, shader_state->tf_outputs,
- sizeof(*key->fs_inputs) * shader_state->num_tf_outputs);
- if (shader_state->num_tf_outputs < key->num_fs_inputs) {
- memset(&key->fs_inputs[shader_state->num_tf_outputs],
- 0,
- sizeof(*key->fs_inputs) * (key->num_fs_inputs -
- shader_state->num_tf_outputs));
+
+ /* Coord shaders only output varyings used by transform feedback,
+ * unless they are linked to other shaders in the geometry side
+ * of the pipeline, since in that case any of the output varyings
+ * could be required in later geometry stages to compute
+ * gl_Position or TF outputs.
+ */
+ if (!v3d->prog.bind_gs) {
+ struct v3d_uncompiled_shader *shader_state =
+ key->base.shader_state;
+ memcpy(key->used_outputs, shader_state->tf_outputs,
+ sizeof(*key->used_outputs) *
+ shader_state->num_tf_outputs);
+ if (shader_state->num_tf_outputs < key->num_used_outputs) {
+ uint32_t tail_bytes =
+ sizeof(*key->used_outputs) *
+ (key->num_used_outputs -
+ shader_state->num_tf_outputs);
+ memset(&key->used_outputs[shader_state->num_tf_outputs],
+ 0, tail_bytes);
+ }
+ key->num_used_outputs = shader_state->num_tf_outputs;
}
- key->num_fs_inputs = shader_state->num_tf_outputs;
struct v3d_compiled_shader *cs =
- v3d_get_compiled_shader(v3d, &key->base);
+ v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
if (cs != v3d->prog.cs) {
v3d->prog.cs = cs;
v3d->dirty |= VC5_DIRTY_COMPILED_CS;
v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode)
{
v3d_update_compiled_fs(v3d, prim_mode);
+ v3d_update_compiled_gs(v3d, prim_mode);
v3d_update_compiled_vs(v3d, prim_mode);
}
+void
+v3d_update_compiled_cs(struct v3d_context *v3d)
+{
+ struct v3d_key local_key;
+ struct v3d_key *key = &local_key;
+
+ if (!(v3d->dirty & (VC5_DIRTY_UNCOMPILED_CS |
+ VC5_DIRTY_COMPTEX))) {
+ return;
+ }
+
+ memset(key, 0, sizeof(*key));
+ v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]);
+ key->shader_state = v3d->prog.bind_compute;
+
+ struct v3d_compiled_shader *cs =
+ v3d_get_compiled_shader(v3d, key, sizeof(*key));
+ if (cs != v3d->prog.compute) {
+ v3d->prog.compute = cs;
+ v3d->dirty |= VC5_DIRTY_COMPILED_CS; /* XXX */
+ }
+}
+
static uint32_t
fs_cache_hash(const void *key)
{
return _mesa_hash_data(key, sizeof(struct v3d_fs_key));
}
+static uint32_t
+gs_cache_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct v3d_gs_key));
+}
+
static uint32_t
vs_cache_hash(const void *key)
{
return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
}
+static uint32_t
+cs_cache_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct v3d_key));
+}
+
static bool
fs_cache_compare(const void *key1, const void *key2)
{
return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0;
}
+static bool
+gs_cache_compare(const void *key1, const void *key2)
+{
+ return memcmp(key1, key2, sizeof(struct v3d_gs_key)) == 0;
+}
+
static bool
vs_cache_compare(const void *key1, const void *key2)
{
return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
}
-static void
-delete_from_cache_if_matches(struct hash_table *ht,
- struct v3d_compiled_shader **last_compile,
- struct hash_entry *entry,
- struct v3d_uncompiled_shader *so)
+static bool
+cs_cache_compare(const void *key1, const void *key2)
{
- const struct v3d_key *key = entry->key;
-
- if (key->shader_state == so) {
- struct v3d_compiled_shader *shader = entry->data;
- _mesa_hash_table_remove(ht, entry);
- v3d_bo_unreference(&shader->bo);
-
- if (shader == *last_compile)
- *last_compile = NULL;
-
- ralloc_free(shader);
- }
+ return memcmp(key1, key2, sizeof(struct v3d_key)) == 0;
}
static void
{
struct v3d_context *v3d = v3d_context(pctx);
struct v3d_uncompiled_shader *so = hwcso;
+ nir_shader *s = so->base.ir.nir;
- struct hash_entry *entry;
- hash_table_foreach(v3d->fs_cache, entry) {
- delete_from_cache_if_matches(v3d->fs_cache, &v3d->prog.fs,
- entry, so);
- }
- hash_table_foreach(v3d->vs_cache, entry) {
- delete_from_cache_if_matches(v3d->vs_cache, &v3d->prog.vs,
- entry, so);
+ hash_table_foreach(v3d->prog.cache[s->info.stage], entry) {
+ const struct v3d_key *key = entry->key;
+ struct v3d_compiled_shader *shader = entry->data;
+
+ if (key->shader_state != so)
+ continue;
+
+ if (v3d->prog.fs == shader)
+ v3d->prog.fs = NULL;
+ if (v3d->prog.vs == shader)
+ v3d->prog.vs = NULL;
+ if (v3d->prog.cs == shader)
+ v3d->prog.cs = NULL;
+ if (v3d->prog.compute == shader)
+ v3d->prog.compute = NULL;
+
+ _mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry);
+ v3d_free_compiled_shader(shader);
}
ralloc_free(so->base.ir.nir);
v3d->dirty |= VC5_DIRTY_UNCOMPILED_FS;
}
+static void
+v3d_gp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct v3d_context *v3d = v3d_context(pctx);
+ v3d->prog.bind_gs = hwcso;
+ v3d->dirty |= VC5_DIRTY_UNCOMPILED_GS;
+}
+
static void
v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso)
{
v3d->dirty |= VC5_DIRTY_UNCOMPILED_VS;
}
+static void
+v3d_compute_state_bind(struct pipe_context *pctx, void *state)
+{
+ struct v3d_context *v3d = v3d_context(pctx);
+
+ v3d->prog.bind_compute = state;
+ v3d->dirty |= VC5_DIRTY_UNCOMPILED_CS;
+}
+
+static void *
+v3d_create_compute_state(struct pipe_context *pctx,
+ const struct pipe_compute_state *cso)
+{
+ return v3d_uncompiled_shader_create(pctx, cso->ir_type,
+ (void *)cso->prog);
+}
+
void
v3d_program_init(struct pipe_context *pctx)
{
pctx->create_vs_state = v3d_shader_state_create;
pctx->delete_vs_state = v3d_shader_state_delete;
+ pctx->create_gs_state = v3d_shader_state_create;
+ pctx->delete_gs_state = v3d_shader_state_delete;
+
pctx->create_fs_state = v3d_shader_state_create;
pctx->delete_fs_state = v3d_shader_state_delete;
pctx->bind_fs_state = v3d_fp_state_bind;
+ pctx->bind_gs_state = v3d_gp_state_bind;
pctx->bind_vs_state = v3d_vp_state_bind;
- v3d->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash,
- fs_cache_compare);
- v3d->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
- vs_cache_compare);
+ if (v3d->screen->has_csd) {
+ pctx->create_compute_state = v3d_create_compute_state;
+ pctx->delete_compute_state = v3d_shader_state_delete;
+ pctx->bind_compute_state = v3d_compute_state_bind;
+ }
+
+ v3d->prog.cache[MESA_SHADER_VERTEX] =
+ _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare);
+ v3d->prog.cache[MESA_SHADER_GEOMETRY] =
+ _mesa_hash_table_create(pctx, gs_cache_hash, gs_cache_compare);
+ v3d->prog.cache[MESA_SHADER_FRAGMENT] =
+ _mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare);
+ v3d->prog.cache[MESA_SHADER_COMPUTE] =
+ _mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare);
}
void
{
struct v3d_context *v3d = v3d_context(pctx);
- struct hash_entry *entry;
- hash_table_foreach(v3d->fs_cache, entry) {
- struct v3d_compiled_shader *shader = entry->data;
- v3d_bo_unreference(&shader->bo);
- ralloc_free(shader);
- _mesa_hash_table_remove(v3d->fs_cache, entry);
- }
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct hash_table *cache = v3d->prog.cache[i];
+ if (!cache)
+ continue;
- hash_table_foreach(v3d->vs_cache, entry) {
- struct v3d_compiled_shader *shader = entry->data;
- v3d_bo_unreference(&shader->bo);
- ralloc_free(shader);
- _mesa_hash_table_remove(v3d->vs_cache, entry);
+ hash_table_foreach(cache, entry) {
+ struct v3d_compiled_shader *shader = entry->data;
+ v3d_free_compiled_shader(shader);
+ _mesa_hash_table_remove(cache, entry);
+ }
}
v3d_bo_unreference(&v3d->prog.spill_bo);