src/gallium/drivers/vc5/vc5_program.c

   1 /*
   2  * Copyright © 2014-2017 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <inttypes.h>
  25 #include "util/u_format.h"
  26 #include "util/u_math.h"
  27 #include "util/u_memory.h"
  28 #include "util/ralloc.h"
  29 #include "util/hash_table.h"
  30 #include "tgsi/tgsi_dump.h"
  31 #include "tgsi/tgsi_parse.h"
  32 #include "compiler/nir/nir.h"
  33 #include "compiler/nir/nir_builder.h"
  34 #include "nir/tgsi_to_nir.h"
  35 #include "compiler/v3d_compiler.h"
  36 #include "vc5_context.h"
  37 #include "broadcom/cle/v3d_packet_v33_pack.h"
  38 #include "mesa/state_tracker/st_glsl_types.h"
  39
  40 static gl_varying_slot
  41 vc5_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location)
  42 {
  43         nir_foreach_variable(var, &s->outputs) {
  44                 if (var->data.driver_location == driver_location) {
  45                         return var->data.location;
  46                 }
  47         }
  48
  49         return -1;
  50 }
  51
  52 /**
  53  * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader.
  54  *
  55  * A shader can have 16 of these specs, and each one of them can write up to
  56  * 16 dwords.  Since we allow a total of 64 transform feedback output
  57  * components (not 16 vectors), we have to group the writes of multiple
  58  * varyings together in a single data spec.
  59  */
  60 static void
  61 vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so,
  62                                    const struct pipe_stream_output_info *stream_output)
  63 {
  64         if (!stream_output->num_outputs)
  65                 return;
  66
  67         struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4];
  68         int slot_count = 0;
  69
  70         for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) {
  71                 uint32_t buffer_offset = 0;
  72                 uint32_t vpm_start = slot_count;
  73
  74                 for (int i = 0; i < stream_output->num_outputs; i++) {
  75                         const struct pipe_stream_output *output =
  76                                 &stream_output->output[i];
  77
  78                         if (output->output_buffer != buffer)
  79                                 continue;
  80
  81                         /* We assume that the SO outputs appear in increasing
  82                          * order in the buffer.
  83                          */
  84                         assert(output->dst_offset >= buffer_offset);
  85
  86                         /* Pad any undefined slots in the output */
  87                         for (int j = buffer_offset; j < output->dst_offset; j++) {
  88                                 slots[slot_count] =
  89                                         v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0);
  90                                 slot_count++;
  91                                 buffer_offset++;
  92                         }
  93
  94                         /* Set the coordinate shader up to output the
  95                          * components of this varying.
  96                          */
  97                         for (int j = 0; j < output->num_components; j++) {
  98                                 gl_varying_slot slot =
  99                                         vc5_get_slot_for_driver_location(so->base.ir.nir, output->register_index);
 100
 101                                 slots[slot_count] =
 102                                         v3d_slot_from_slot_and_component(slot,
 103                                                                          output->start_component + j);
 104                                 slot_count++;
 105                                 buffer_offset++;
 106                         }
 107                 }
 108
 109                 uint32_t vpm_size = slot_count - vpm_start;
 110                 if (!vpm_size)
 111                         continue;
 112
 113                 uint32_t vpm_start_offset = vpm_start + 6;
 114
 115                 while (vpm_size) {
 116                         uint32_t write_size = MIN2(vpm_size, 1 << 4);
 117
 118                         struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
 119                                 /* We need the offset from the coordinate shader's VPM
 120                                  * output block, which has the [X, Y, Z, W, Xs, Ys]
 121                                  * values at the start.
 122                                  */
 123                                 .first_shaded_vertex_value_to_output = vpm_start_offset,
 124                                 .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = write_size - 1,
 125                                 .output_buffer_to_write_to = buffer,
 126                         };
 127
 128                         assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs));
 129                         V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
 130                                                                        (void *)&so->tf_specs[so->num_tf_specs],
 131                                                                        &unpacked);
 132
 133                         /* If point size is being written by the shader, then
 134                          * all the VPM start offsets are shifted up by one.
 135                          * We won't know that until the variant is compiled,
 136                          * though.
 137                          */
 138                         unpacked.first_shaded_vertex_value_to_output++;
 139                         V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
 140                                                                        (void *)&so->tf_specs_psiz[so->num_tf_specs],
 141                                                                        &unpacked);
 142                         so->num_tf_specs++;
 143                         vpm_start_offset += write_size;
 144                         vpm_size -= write_size;
 145                 }
 146         }
 147
 148         so->num_tf_outputs = slot_count;
 149         so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot,
 150                                       slot_count);
 151         memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count);
 152 }
 153
 154 static int
 155 type_size(const struct glsl_type *type)
 156 {
 157         return glsl_count_attribute_slots(type, false);
 158 }
 159
 160 static int
 161 uniforms_type_size(const struct glsl_type *type)
 162 {
 163         return st_glsl_storage_type_size(type, false);
 164 }
 165
 166 static void *
 167 vc5_shader_state_create(struct pipe_context *pctx,
 168                         const struct pipe_shader_state *cso)
 169 {
 170         struct vc5_context *vc5 = vc5_context(pctx);
 171         struct vc5_uncompiled_shader *so = CALLOC_STRUCT(vc5_uncompiled_shader);
 172         if (!so)
 173                 return NULL;
 174
 175         so->program_id = vc5->next_uncompiled_program_id++;
 176
 177         nir_shader *s;
 178
 179         if (cso->type == PIPE_SHADER_IR_NIR) {
 180                 /* The backend takes ownership of the NIR shader on state
 181                  * creation.
 182                  */
 183                 s = cso->ir.nir;
 184
 185                 NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform,
 186                            type_size,
 187                            (nir_lower_io_options)0);
 188                 NIR_PASS_V(s, nir_lower_io, nir_var_uniform,
 189                            uniforms_type_size,
 190                            (nir_lower_io_options)0);
 191         } else {
 192                 assert(cso->type == PIPE_SHADER_IR_TGSI);
 193
 194                 if (V3D_DEBUG & V3D_DEBUG_TGSI) {
 195                         fprintf(stderr, "prog %d TGSI:\n",
 196                                 so->program_id);
 197                         tgsi_dump(cso->tokens, 0);
 198                         fprintf(stderr, "\n");
 199                 }
 200                 s = tgsi_to_nir(cso->tokens, &v3d_nir_options);
 201
 202                 so->was_tgsi = true;
 203         }
 204
 205         NIR_PASS_V(s, nir_opt_global_to_local);
 206         NIR_PASS_V(s, nir_lower_regs_to_ssa);
 207         NIR_PASS_V(s, nir_normalize_cubemap_coords);
 208
 209         NIR_PASS_V(s, nir_lower_load_const_to_scalar);
 210
 211         v3d_optimize_nir(s);
 212
 213         NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local);
 214
 215         /* Garbage collect dead instructions */
 216         nir_sweep(s);
 217
 218         so->base.type = PIPE_SHADER_IR_NIR;
 219         so->base.ir.nir = s;
 220
 221         vc5_set_transform_feedback_outputs(so, &cso->stream_output);
 222
 223         if (V3D_DEBUG & (V3D_DEBUG_NIR |
 224                          v3d_debug_flag_for_shader_stage(s->info.stage))) {
 225                 fprintf(stderr, "%s prog %d NIR:\n",
 226                         gl_shader_stage_name(s->info.stage),
 227                         so->program_id);
 228                 nir_print_shader(s, stderr);
 229                 fprintf(stderr, "\n");
 230         }
 231
 232         return so;
 233 }
 234
 235 static struct vc5_compiled_shader *
 236 vc5_get_compiled_shader(struct vc5_context *vc5, struct v3d_key *key)
 237 {
 238         struct vc5_uncompiled_shader *shader_state = key->shader_state;
 239         nir_shader *s = shader_state->base.ir.nir;
 240
 241         struct hash_table *ht;
 242         uint32_t key_size;
 243         if (s->info.stage == MESA_SHADER_FRAGMENT) {
 244                 ht = vc5->fs_cache;
 245                 key_size = sizeof(struct v3d_fs_key);
 246         } else {
 247                 ht = vc5->vs_cache;
 248                 key_size = sizeof(struct v3d_vs_key);
 249         }
 250
 251         struct hash_entry *entry = _mesa_hash_table_search(ht, key);
 252         if (entry)
 253                 return entry->data;
 254
 255         struct vc5_compiled_shader *shader =
 256                 rzalloc(NULL, struct vc5_compiled_shader);
 257
 258         int program_id = shader_state->program_id;
 259         int variant_id =
 260                 p_atomic_inc_return(&shader_state->compiled_variant_count);
 261         uint64_t *qpu_insts;
 262         uint32_t shader_size;
 263
 264         switch (s->info.stage) {
 265         case MESA_SHADER_VERTEX:
 266                 shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data);
 267
 268                 qpu_insts = v3d_compile_vs(vc5->screen->compiler,
 269                                            (struct v3d_vs_key *)key,
 270                                            shader->prog_data.vs, s,
 271                                            program_id, variant_id,
 272                                            &shader_size);
 273                 break;
 274         case MESA_SHADER_FRAGMENT:
 275                 shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data);
 276
 277                 qpu_insts = v3d_compile_fs(vc5->screen->compiler,
 278                                            (struct v3d_fs_key *)key,
 279                                            shader->prog_data.fs, s,
 280                                            program_id, variant_id,
 281                                            &shader_size);
 282                 break;
 283         default:
 284                 unreachable("bad stage");
 285         }
 286
 287         vc5_set_shader_uniform_dirty_flags(shader);
 288
 289         shader->bo = vc5_bo_alloc(vc5->screen, shader_size, "shader");
 290         vc5_bo_map(shader->bo);
 291         memcpy(shader->bo->map, qpu_insts, shader_size);
 292
 293         free(qpu_insts);
 294
 295         struct vc5_key *dup_key;
 296         dup_key = ralloc_size(shader, key_size);
 297         memcpy(dup_key, key, key_size);
 298         _mesa_hash_table_insert(ht, dup_key, shader);
 299
 300         if (shader->prog_data.base->spill_size >
 301             vc5->prog.spill_size_per_thread) {
 302                 /* Max 4 QPUs per slice, 3 slices per core. We only do single
 303                  * core so far.  This overallocates memory on smaller cores.
 304                  */
 305                 int total_spill_size =
 306                         4 * 3 * shader->prog_data.base->spill_size;
 307
 308                 vc5_bo_unreference(&vc5->prog.spill_bo);
 309                 vc5->prog.spill_bo = vc5_bo_alloc(vc5->screen,
 310                                                   total_spill_size, "spill");
 311                 vc5->prog.spill_size_per_thread =
 312                         shader->prog_data.base->spill_size;
 313         }
 314
 315         return shader;
 316 }
 317
 318 static void
 319 vc5_setup_shared_key(struct vc5_context *vc5, struct v3d_key *key,
 320                      struct vc5_texture_stateobj *texstate)
 321 {
 322         const struct v3d_device_info *devinfo = &vc5->screen->devinfo;
 323
 324         for (int i = 0; i < texstate->num_textures; i++) {
 325                 struct pipe_sampler_view *sampler = texstate->textures[i];
 326                 struct vc5_sampler_view *vc5_sampler = vc5_sampler_view(sampler);
 327                 struct pipe_sampler_state *sampler_state =
 328                         texstate->samplers[i];
 329
 330                 if (!sampler)
 331                         continue;
 332
 333                 key->tex[i].return_size =
 334                         vc5_get_tex_return_size(devinfo,
 335                                                 sampler->format,
 336                                                 sampler_state->compare_mode);
 337
 338                 /* For 16-bit, we set up the sampler to always return 2
 339                  * channels (meaning no recompiles for most statechanges),
 340                  * while for 32 we actually scale the returns with channels.
 341                  */
 342                 if (key->tex[i].return_size == 16) {
 343                         key->tex[i].return_channels = 2;
 344                 } else if (devinfo->ver > 40) {
 345                         key->tex[i].return_channels = 4;
 346                 } else {
 347                         key->tex[i].return_channels =
 348                                 vc5_get_tex_return_channels(devinfo,
 349                                                             sampler->format);
 350                 }
 351
 352                 if (key->tex[i].return_size == 32 && devinfo->ver < 40) {
 353                         memcpy(key->tex[i].swizzle,
 354                                vc5_sampler->swizzle,
 355                                sizeof(vc5_sampler->swizzle));
 356                 } else {
 357                         /* For 16-bit returns, we let the sampler state handle
 358                          * the swizzle.
 359                          */
 360                         key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
 361                         key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
 362                         key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
 363                         key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
 364                 }
 365
 366                 if (sampler) {
 367                         key->tex[i].compare_mode = sampler_state->compare_mode;
 368                         key->tex[i].compare_func = sampler_state->compare_func;
 369                         key->tex[i].clamp_s =
 370                                 sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP;
 371                         key->tex[i].clamp_t =
 372                                 sampler_state->wrap_t == PIPE_TEX_WRAP_CLAMP;
 373                         key->tex[i].clamp_r =
 374                                 sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP;
 375                 }
 376         }
 377
 378         key->ucp_enables = vc5->rasterizer->base.clip_plane_enable;
 379 }
 380
 381 static void
 382 vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode)
 383 {
 384         struct vc5_job *job = vc5->job;
 385         struct v3d_fs_key local_key;
 386         struct v3d_fs_key *key = &local_key;
 387
 388         if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE |
 389                             VC5_DIRTY_BLEND |
 390                             VC5_DIRTY_FRAMEBUFFER |
 391                             VC5_DIRTY_ZSA |
 392                             VC5_DIRTY_RASTERIZER |
 393                             VC5_DIRTY_SAMPLE_MASK |
 394                             VC5_DIRTY_FRAGTEX |
 395                             VC5_DIRTY_UNCOMPILED_FS))) {
 396                 return;
 397         }
 398
 399         memset(key, 0, sizeof(*key));
 400         vc5_setup_shared_key(vc5, &key->base, &vc5->fragtex);
 401         key->base.shader_state = vc5->prog.bind_fs;
 402         key->is_points = (prim_mode == PIPE_PRIM_POINTS);
 403         key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
 404                          prim_mode <= PIPE_PRIM_LINE_STRIP);
 405         key->clamp_color = vc5->rasterizer->base.clamp_fragment_color;
 406         if (vc5->blend->logicop_enable) {
 407                 key->logicop_func = vc5->blend->logicop_func;
 408         } else {
 409                 key->logicop_func = PIPE_LOGICOP_COPY;
 410         }
 411         if (job->msaa) {
 412                 key->msaa = vc5->rasterizer->base.multisample;
 413                 key->sample_coverage = (vc5->rasterizer->base.multisample &&
 414                                         vc5->sample_mask != (1 << VC5_MAX_SAMPLES) - 1);
 415                 key->sample_alpha_to_coverage = vc5->blend->alpha_to_coverage;
 416                 key->sample_alpha_to_one = vc5->blend->alpha_to_one;
 417         }
 418
 419         key->depth_enabled = (vc5->zsa->base.depth.enabled ||
 420                               vc5->zsa->base.stencil[0].enabled);
 421         if (vc5->zsa->base.alpha.enabled) {
 422                 key->alpha_test = true;
 423                 key->alpha_test_func = vc5->zsa->base.alpha.func;
 424         }
 425
 426         /* gl_FragColor's propagation to however many bound color buffers
 427          * there are means that the buffer count needs to be in the key.
 428          */
 429         key->nr_cbufs = vc5->framebuffer.nr_cbufs;
 430         key->swap_color_rb = vc5->swap_color_rb;
 431
 432         for (int i = 0; i < key->nr_cbufs; i++) {
 433                 struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i];
 434                 if (!cbuf)
 435                         continue;
 436
 437                 const struct util_format_description *desc =
 438                         util_format_description(cbuf->format);
 439
 440                 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
 441                     desc->channel[0].size == 32) {
 442                         key->f32_color_rb |= 1 << i;
 443                 }
 444
 445                 if (vc5->prog.bind_fs->was_tgsi) {
 446                         if (util_format_is_pure_uint(cbuf->format))
 447                                 key->uint_color_rb |= 1 << i;
 448                         else if (util_format_is_pure_sint(cbuf->format))
 449                                 key->int_color_rb |= 1 << i;
 450                 }
 451         }
 452
 453         if (key->is_points) {
 454                 key->point_sprite_mask =
 455                         vc5->rasterizer->base.sprite_coord_enable;
 456                 key->point_coord_upper_left =
 457                         (vc5->rasterizer->base.sprite_coord_mode ==
 458                          PIPE_SPRITE_COORD_UPPER_LEFT);
 459         }
 460
 461         key->light_twoside = vc5->rasterizer->base.light_twoside;
 462         key->shade_model_flat = vc5->rasterizer->base.flatshade;
 463
 464         struct vc5_compiled_shader *old_fs = vc5->prog.fs;
 465         vc5->prog.fs = vc5_get_compiled_shader(vc5, &key->base);
 466         if (vc5->prog.fs == old_fs)
 467                 return;
 468
 469         vc5->dirty |= VC5_DIRTY_COMPILED_FS;
 470
 471         if (old_fs &&
 472             vc5->prog.fs->prog_data.fs->flat_shade_flags !=
 473             old_fs->prog_data.fs->flat_shade_flags) {
 474                 vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
 475         }
 476
 477         if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots,
 478                              old_fs->prog_data.fs->input_slots,
 479                              sizeof(vc5->prog.fs->prog_data.fs->input_slots))) {
 480                 vc5->dirty |= VC5_DIRTY_FS_INPUTS;
 481         }
 482 }
 483
 484 static void
 485 vc5_update_compiled_vs(struct vc5_context *vc5, uint8_t prim_mode)
 486 {
 487         struct v3d_vs_key local_key;
 488         struct v3d_vs_key *key = &local_key;
 489
 490         if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE |
 491                             VC5_DIRTY_RASTERIZER |
 492                             VC5_DIRTY_VERTTEX |
 493                             VC5_DIRTY_VTXSTATE |
 494                             VC5_DIRTY_UNCOMPILED_VS |
 495                             VC5_DIRTY_FS_INPUTS))) {
 496                 return;
 497         }
 498
 499         memset(key, 0, sizeof(*key));
 500         vc5_setup_shared_key(vc5, &key->base, &vc5->verttex);
 501         key->base.shader_state = vc5->prog.bind_vs;
 502         key->num_fs_inputs = vc5->prog.fs->prog_data.fs->base.num_inputs;
 503         STATIC_ASSERT(sizeof(key->fs_inputs) ==
 504                       sizeof(vc5->prog.fs->prog_data.fs->input_slots));
 505         memcpy(key->fs_inputs, vc5->prog.fs->prog_data.fs->input_slots,
 506                sizeof(key->fs_inputs));
 507         key->clamp_color = vc5->rasterizer->base.clamp_vertex_color;
 508
 509         key->per_vertex_point_size =
 510                 (prim_mode == PIPE_PRIM_POINTS &&
 511                  vc5->rasterizer->base.point_size_per_vertex);
 512
 513         struct vc5_compiled_shader *vs =
 514                 vc5_get_compiled_shader(vc5, &key->base);
 515         if (vs != vc5->prog.vs) {
 516                 vc5->prog.vs = vs;
 517                 vc5->dirty |= VC5_DIRTY_COMPILED_VS;
 518         }
 519
 520         key->is_coord = true;
 521         /* Coord shaders only output varyings used by transform feedback. */
 522         struct vc5_uncompiled_shader *shader_state = key->base.shader_state;
 523         memcpy(key->fs_inputs, shader_state->tf_outputs,
 524                sizeof(*key->fs_inputs) * shader_state->num_tf_outputs);
 525         if (shader_state->num_tf_outputs < key->num_fs_inputs) {
 526                 memset(&key->fs_inputs[shader_state->num_tf_outputs],
 527                        0,
 528                        sizeof(*key->fs_inputs) * (key->num_fs_inputs -
 529                                                   shader_state->num_tf_outputs));
 530         }
 531         key->num_fs_inputs = shader_state->num_tf_outputs;
 532
 533         struct vc5_compiled_shader *cs =
 534                 vc5_get_compiled_shader(vc5, &key->base);
 535         if (cs != vc5->prog.cs) {
 536                 vc5->prog.cs = cs;
 537                 vc5->dirty |= VC5_DIRTY_COMPILED_CS;
 538         }
 539 }
 540
 541 void
 542 vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode)
 543 {
 544         vc5_update_compiled_fs(vc5, prim_mode);
 545         vc5_update_compiled_vs(vc5, prim_mode);
 546 }
 547
 548 static uint32_t
 549 fs_cache_hash(const void *key)
 550 {
 551         return _mesa_hash_data(key, sizeof(struct v3d_fs_key));
 552 }
 553
 554 static uint32_t
 555 vs_cache_hash(const void *key)
 556 {
 557         return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
 558 }
 559
 560 static bool
 561 fs_cache_compare(const void *key1, const void *key2)
 562 {
 563         return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0;
 564 }
 565
 566 static bool
 567 vs_cache_compare(const void *key1, const void *key2)
 568 {
 569         return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
 570 }
 571
 572 static void
 573 delete_from_cache_if_matches(struct hash_table *ht,
 574                              struct vc5_compiled_shader **last_compile,
 575                              struct hash_entry *entry,
 576                              struct vc5_uncompiled_shader *so)
 577 {
 578         const struct v3d_key *key = entry->key;
 579
 580         if (key->shader_state == so) {
 581                 struct vc5_compiled_shader *shader = entry->data;
 582                 _mesa_hash_table_remove(ht, entry);
 583                 vc5_bo_unreference(&shader->bo);
 584
 585                 if (shader == *last_compile)
 586                         *last_compile = NULL;
 587
 588                 ralloc_free(shader);
 589         }
 590 }
 591
 592 static void
 593 vc5_shader_state_delete(struct pipe_context *pctx, void *hwcso)
 594 {
 595         struct vc5_context *vc5 = vc5_context(pctx);
 596         struct vc5_uncompiled_shader *so = hwcso;
 597
 598         struct hash_entry *entry;
 599         hash_table_foreach(vc5->fs_cache, entry) {
 600                 delete_from_cache_if_matches(vc5->fs_cache, &vc5->prog.fs,
 601                                              entry, so);
 602         }
 603         hash_table_foreach(vc5->vs_cache, entry) {
 604                 delete_from_cache_if_matches(vc5->vs_cache, &vc5->prog.vs,
 605                                              entry, so);
 606         }
 607
 608         ralloc_free(so->base.ir.nir);
 609         free(so);
 610 }
 611
 612 static void
 613 vc5_fp_state_bind(struct pipe_context *pctx, void *hwcso)
 614 {
 615         struct vc5_context *vc5 = vc5_context(pctx);
 616         vc5->prog.bind_fs = hwcso;
 617         vc5->dirty |= VC5_DIRTY_UNCOMPILED_FS;
 618 }
 619
 620 static void
 621 vc5_vp_state_bind(struct pipe_context *pctx, void *hwcso)
 622 {
 623         struct vc5_context *vc5 = vc5_context(pctx);
 624         vc5->prog.bind_vs = hwcso;
 625         vc5->dirty |= VC5_DIRTY_UNCOMPILED_VS;
 626 }
 627
 628 void
 629 vc5_program_init(struct pipe_context *pctx)
 630 {
 631         struct vc5_context *vc5 = vc5_context(pctx);
 632
 633         pctx->create_vs_state = vc5_shader_state_create;
 634         pctx->delete_vs_state = vc5_shader_state_delete;
 635
 636         pctx->create_fs_state = vc5_shader_state_create;
 637         pctx->delete_fs_state = vc5_shader_state_delete;
 638
 639         pctx->bind_fs_state = vc5_fp_state_bind;
 640         pctx->bind_vs_state = vc5_vp_state_bind;
 641
 642         vc5->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash,
 643                                                 fs_cache_compare);
 644         vc5->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
 645                                                 vs_cache_compare);
 646 }
 647
 648 void
 649 vc5_program_fini(struct pipe_context *pctx)
 650 {
 651         struct vc5_context *vc5 = vc5_context(pctx);
 652
 653         struct hash_entry *entry;
 654         hash_table_foreach(vc5->fs_cache, entry) {
 655                 struct vc5_compiled_shader *shader = entry->data;
 656                 vc5_bo_unreference(&shader->bo);
 657                 ralloc_free(shader);
 658                 _mesa_hash_table_remove(vc5->fs_cache, entry);
 659         }
 660
 661         hash_table_foreach(vc5->vs_cache, entry) {
 662                 struct vc5_compiled_shader *shader = entry->data;
 663                 vc5_bo_unreference(&shader->bo);
 664                 ralloc_free(shader);
 665                 _mesa_hash_table_remove(vc5->vs_cache, entry);
 666         }
 667 }