v3d: Add Compute Shader compilation support.
authorEric Anholt <eric@anholt.net>
Wed, 5 Dec 2018 23:41:35 +0000 (15:41 -0800)
committerEric Anholt <eric@anholt.net>
Fri, 12 Apr 2019 22:59:31 +0000 (15:59 -0700)
While waiting for the CSD UABI to get reviewed, I keep having to rebase
the CS patch.  Just land the compiler side for now to keep it from
diverging.

For now this covers just GLES 3.1 compute shaders, not CL kernels.

src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c
src/gallium/drivers/v3d/v3d_context.h
src/gallium/drivers/v3d/v3d_program.c
src/gallium/drivers/v3d/v3d_screen.c
src/gallium/drivers/v3d/v3d_screen.h
src/gallium/drivers/v3d/v3d_uniforms.c
src/gallium/drivers/v3d/v3dx_draw.c

index 2b196324754aab36db52f6d4f7ddc025e666cef6..a7b3adb6c634d295ecead1be82e0c9c0b1fea619 100644 (file)
@@ -1806,7 +1806,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
         case nir_intrinsic_memory_barrier_atomic_counter:
         case nir_intrinsic_memory_barrier_buffer:
         case nir_intrinsic_memory_barrier_image:
-        case nir_intrinsic_memory_barrier_shared:
+        case nir_intrinsic_group_memory_barrier:
                 /* We don't do any instruction scheduling of these NIR
                  * instructions between each other, so we just need to make
                  * sure that the TMU operations before the barrier are flushed
@@ -1869,6 +1869,10 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                                        vir_uniform_ui(c, 0xffff)));
                 break;
 
+        case nir_intrinsic_load_subgroup_id:
+                ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));
+                break;
+
         default:
                 fprintf(stderr, "Unknown intrinsic: ");
                 nir_print_instr(&instr->instr, stderr);
@@ -2444,6 +2448,8 @@ v3d_nir_to_vir(struct v3d_compile *c)
         case MESA_SHADER_VERTEX:
                 emit_vert_end(c);
                 break;
+        case MESA_SHADER_COMPUTE:
+                break;
         default:
                 unreachable("bad stage");
         }
index 94247860c680166d8887c0645914da913cae1461..b2bc40b10fe3df3ca089f96d563171d907e28934 100644 (file)
@@ -691,6 +691,12 @@ struct v3d_fs_prog_data {
         bool uses_center_w;
 };
 
+struct v3d_compute_prog_data {
+        struct v3d_prog_data base;
+        /* Size in bytes of the workgroup's shared space. */
+        uint32_t shared_size;
+};
+
 static inline bool
 vir_has_uniform(struct qinst *inst)
 {
index b785b53c62c9cf72a7f1cb07467fc0b40ba6b04c..6655e5e73bcd11e40a7152b541a84a8d73d39984 100644 (file)
@@ -562,6 +562,21 @@ v3d_lower_nir(struct v3d_compile *c)
                 }
         }
 
+        /* CS textures may not have return_size reflecting the shadow state. */
+        nir_foreach_variable(var, &c->s->uniforms) {
+                const struct glsl_type *type = glsl_without_array(var->type);
+                unsigned array_len = MAX2(glsl_get_length(var->type), 1);
+
+                if (!glsl_type_is_sampler(type) ||
+                    !glsl_sampler_type_is_shadow(type))
+                        continue;
+
+                for (int i = 0; i < array_len; i++) {
+                        tex_options.lower_tex_packing[var->data.binding + i] =
+                                nir_lower_tex_packing_16;
+                }
+        }
+
         NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
         NIR_PASS_V(c->s, nir_lower_system_values);
 }
@@ -669,6 +684,13 @@ v3d_fs_set_prog_data(struct v3d_compile *c,
         prog_data->uses_center_w = c->uses_center_w;
 }
 
+static void
+v3d_cs_set_prog_data(struct v3d_compile *c,
+                     struct v3d_compute_prog_data *prog_data)
+{
+        prog_data->shared_size = c->s->info.cs.shared_size;
+}
+
 static void
 v3d_set_prog_data(struct v3d_compile *c,
                   struct v3d_prog_data *prog_data)
@@ -679,7 +701,9 @@ v3d_set_prog_data(struct v3d_compile *c,
 
         v3d_set_prog_data_uniforms(c, prog_data);
 
-        if (c->s->info.stage == MESA_SHADER_VERTEX) {
+        if (c->s->info.stage == MESA_SHADER_COMPUTE) {
+                v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data);
+        } else if (c->s->info.stage == MESA_SHADER_VERTEX) {
                 v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
         } else {
                 assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
@@ -865,13 +889,17 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
                 c->fs_key = (struct v3d_fs_key *)key;
                 prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data));
                 break;
+        case MESA_SHADER_COMPUTE:
+                prog_data = rzalloc_size(NULL,
+                                         sizeof(struct v3d_compute_prog_data));
+                break;
         default:
                 unreachable("unsupported shader stage");
         }
 
         if (c->s->info.stage == MESA_SHADER_VERTEX) {
                 v3d_nir_lower_vs_early(c);
-        } else {
+        } else if (c->s->info.stage != MESA_SHADER_COMPUTE) {
                 assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
                 v3d_nir_lower_fs_early(c);
         }
@@ -880,7 +908,7 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
 
         if (c->s->info.stage == MESA_SHADER_VERTEX) {
                 v3d_nir_lower_vs_late(c);
-        } else {
+        } else if (c->s->info.stage != MESA_SHADER_COMPUTE)  {
                 assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
                 v3d_nir_lower_fs_late(c);
         }
index 3b39d18145d23a9efd08de5af92f68d31e4ce869..225ebe2f5abf0308f3c383f0c3e150f37ad15186 100644 (file)
@@ -186,6 +186,7 @@ struct v3d_compiled_shader {
                 struct v3d_prog_data *base;
                 struct v3d_vs_prog_data *vs;
                 struct v3d_fs_prog_data *fs;
+                struct v3d_compute_prog_data *compute;
         } prog_data;
 
         /**
@@ -197,8 +198,10 @@ struct v3d_compiled_shader {
 };
 
 struct v3d_program_stateobj {
-        struct v3d_uncompiled_shader *bind_vs, *bind_fs;
-        struct v3d_compiled_shader *cs, *vs, *fs;
+        struct v3d_uncompiled_shader *bind_vs, *bind_fs, *bind_compute;
+        struct v3d_compiled_shader *cs, *vs, *fs, *compute;
+
+        struct hash_table *cache[MESA_SHADER_STAGES];
 
         struct v3d_bo *spill_bo;
         int spill_size_per_thread;
@@ -414,7 +417,6 @@ struct v3d_context {
 
         struct primconvert_context *primconvert;
 
-        struct hash_table *fs_cache, *vs_cache;
         uint32_t next_uncompiled_program_id;
         uint64_t next_compiled_program_id;
 
@@ -446,6 +448,8 @@ struct v3d_context {
         struct v3d_depth_stencil_alpha_state *zsa;
 
         struct v3d_program_stateobj prog;
+        uint32_t compute_num_workgroups[3];
+        struct v3d_bo *compute_shared_memory;
 
         struct v3d_vertex_stateobj *vtx;
 
@@ -584,6 +588,7 @@ void v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
 void v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
                                      struct pipe_resource *prsc);
 void v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode);
+void v3d_update_compiled_cs(struct v3d_context *v3d);
 
 bool v3d_rt_format_supported(const struct v3d_device_info *devinfo,
                              enum pipe_format f);
index e3e491e9fd798fdc49adb1379aa0ee746b04c837..7805b808a010edaa712e4d9892e705de6abe5019 100644 (file)
@@ -38,7 +38,8 @@
 #include "broadcom/cle/v3d_packet_v33_pack.h"
 
 static struct v3d_compiled_shader *
-v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key);
+v3d_get_compiled_shader(struct v3d_context *v3d,
+                        struct v3d_key *key, size_t key_size);
 static void
 v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled,
                                 struct v3d_key *key);
@@ -200,7 +201,7 @@ v3d_shader_precompile(struct v3d_context *v3d,
                 }
 
                 v3d_setup_shared_precompile_key(so, &key.base);
-                v3d_get_compiled_shader(v3d, &key.base);
+                v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
         } else {
                 struct v3d_vs_key key = {
                         .base.shader_state = so,
@@ -223,7 +224,7 @@ v3d_shader_precompile(struct v3d_context *v3d,
                         }
                 }
 
-                v3d_get_compiled_shader(v3d, &key.base);
+                v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
 
                 /* Compile VS bin shader: only position (XXX: include TF) */
                 key.is_coord = true;
@@ -233,13 +234,13 @@ v3d_shader_precompile(struct v3d_context *v3d,
                                 v3d_slot_from_slot_and_component(VARYING_SLOT_POS,
                                                                  i);
                 }
-                v3d_get_compiled_shader(v3d, &key.base);
+                v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
         }
 }
 
 static void *
-v3d_shader_state_create(struct pipe_context *pctx,
-                        const struct pipe_shader_state *cso)
+v3d_uncompiled_shader_create(struct pipe_context *pctx,
+                             enum pipe_shader_ir type, void *ir)
 {
         struct v3d_context *v3d = v3d_context(pctx);
         struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader);
@@ -250,21 +251,21 @@ v3d_shader_state_create(struct pipe_context *pctx,
 
         nir_shader *s;
 
-        if (cso->type == PIPE_SHADER_IR_NIR) {
+        if (type == PIPE_SHADER_IR_NIR) {
                 /* The backend takes ownership of the NIR shader on state
                  * creation.
                  */
-                s = cso->ir.nir;
+                s = ir;
         } else {
-                assert(cso->type == PIPE_SHADER_IR_TGSI);
+                assert(type == PIPE_SHADER_IR_TGSI);
 
                 if (V3D_DEBUG & V3D_DEBUG_TGSI) {
                         fprintf(stderr, "prog %d TGSI:\n",
                                 so->program_id);
-                        tgsi_dump(cso->tokens, 0);
+                        tgsi_dump(ir, 0);
                         fprintf(stderr, "\n");
                 }
-                s = tgsi_to_nir(cso->tokens, pctx->screen);
+                s = tgsi_to_nir(ir, pctx->screen);
         }
 
         nir_variable_mode lower_mode = nir_var_all & ~nir_var_uniform;
@@ -289,8 +290,6 @@ v3d_shader_state_create(struct pipe_context *pctx,
         so->base.type = PIPE_SHADER_IR_NIR;
         so->base.ir.nir = s;
 
-        v3d_set_transform_feedback_outputs(so, &cso->stream_output);
-
         if (V3D_DEBUG & (V3D_DEBUG_NIR |
                          v3d_debug_flag_for_shader_stage(s->info.stage))) {
                 fprintf(stderr, "%s prog %d NIR:\n",
@@ -314,22 +313,31 @@ v3d_shader_debug_output(const char *message, void *data)
         pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message);
 }
 
-static struct v3d_compiled_shader *
-v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key)
+static void *
+v3d_shader_state_create(struct pipe_context *pctx,
+                        const struct pipe_shader_state *cso)
+{
+        struct v3d_uncompiled_shader *so =
+                v3d_uncompiled_shader_create(pctx,
+                                             cso->type,
+                                             (cso->type == PIPE_SHADER_IR_TGSI ?
+                                              (void *)cso->tokens :
+                                              cso->ir.nir));
+
+        v3d_set_transform_feedback_outputs(so, &cso->stream_output);
+
+        return so;
+}
+
+struct v3d_compiled_shader *
+v3d_get_compiled_shader(struct v3d_context *v3d,
+                        struct v3d_key *key,
+                        size_t key_size)
 {
         struct v3d_uncompiled_shader *shader_state = key->shader_state;
         nir_shader *s = shader_state->base.ir.nir;
 
-        struct hash_table *ht;
-        uint32_t key_size;
-        if (s->info.stage == MESA_SHADER_FRAGMENT) {
-                ht = v3d->fs_cache;
-                key_size = sizeof(struct v3d_fs_key);
-        } else {
-                ht = v3d->vs_cache;
-                key_size = sizeof(struct v3d_vs_key);
-        }
-
+        struct hash_table *ht = v3d->prog.cache[s->info.stage];
         struct hash_entry *entry = _mesa_hash_table_search(ht, key);
         if (entry)
                 return entry->data;
@@ -359,10 +367,12 @@ v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key)
 
         free(qpu_insts);
 
-        struct v3d_key *dup_key;
-        dup_key = ralloc_size(shader, key_size);
-        memcpy(dup_key, key, key_size);
-        _mesa_hash_table_insert(ht, dup_key, shader);
+        if (ht) {
+                struct v3d_key *dup_key;
+                dup_key = ralloc_size(shader, key_size);
+                memcpy(dup_key, key, key_size);
+                _mesa_hash_table_insert(ht, dup_key, shader);
+        }
 
         if (shader->prog_data.base->spill_size >
             v3d->prog.spill_size_per_thread) {
@@ -446,8 +456,6 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
                                 sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP;
                 }
         }
-
-        key->ucp_enables = v3d->rasterizer->base.clip_plane_enable;
 }
 
 static void
@@ -489,6 +497,7 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode)
         memset(key, 0, sizeof(*key));
         v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]);
         key->base.shader_state = v3d->prog.bind_fs;
+        key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
         key->is_points = (prim_mode == PIPE_PRIM_POINTS);
         key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
                          prim_mode <= PIPE_PRIM_LINE_STRIP);
@@ -554,7 +563,7 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode)
         key->shade_model_flat = v3d->rasterizer->base.flatshade;
 
         struct v3d_compiled_shader *old_fs = v3d->prog.fs;
-        v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base);
+        v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
         if (v3d->prog.fs == old_fs)
                 return;
 
@@ -602,6 +611,7 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode)
         memset(key, 0, sizeof(*key));
         v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]);
         key->base.shader_state = v3d->prog.bind_vs;
+        key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
         key->num_fs_inputs = v3d->prog.fs->prog_data.fs->num_inputs;
         STATIC_ASSERT(sizeof(key->fs_inputs) ==
                       sizeof(v3d->prog.fs->prog_data.fs->input_slots));
@@ -614,7 +624,7 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode)
                  v3d->rasterizer->base.point_size_per_vertex);
 
         struct v3d_compiled_shader *vs =
-                v3d_get_compiled_shader(v3d, &key->base);
+                v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
         if (vs != v3d->prog.vs) {
                 v3d->prog.vs = vs;
                 v3d->dirty |= VC5_DIRTY_COMPILED_VS;
@@ -634,7 +644,7 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode)
         key->num_fs_inputs = shader_state->num_tf_outputs;
 
         struct v3d_compiled_shader *cs =
-                v3d_get_compiled_shader(v3d, &key->base);
+                v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
         if (cs != v3d->prog.cs) {
                 v3d->prog.cs = cs;
                 v3d->dirty |= VC5_DIRTY_COMPILED_CS;
@@ -648,6 +658,30 @@ v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode)
         v3d_update_compiled_vs(v3d, prim_mode);
 }
 
+void
+v3d_update_compiled_cs(struct v3d_context *v3d)
+{
+        struct v3d_key local_key;
+        struct v3d_key *key = &local_key;
+
+        if (!(v3d->dirty & (~0 | /* XXX */
+                            VC5_DIRTY_VERTTEX |
+                            VC5_DIRTY_UNCOMPILED_FS))) {
+                return;
+        }
+
+        memset(key, 0, sizeof(*key));
+        v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]);
+        key->shader_state = v3d->prog.bind_compute;
+
+        struct v3d_compiled_shader *cs =
+                v3d_get_compiled_shader(v3d, key, sizeof(*key));
+        if (cs != v3d->prog.compute) {
+                v3d->prog.compute = cs;
+                v3d->dirty |= VC5_DIRTY_COMPILED_CS; /* XXX */
+        }
+}
+
 static uint32_t
 fs_cache_hash(const void *key)
 {
@@ -660,6 +694,12 @@ vs_cache_hash(const void *key)
         return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
 }
 
+static uint32_t
+cs_cache_hash(const void *key)
+{
+        return _mesa_hash_data(key, sizeof(struct v3d_key));
+}
+
 static bool
 fs_cache_compare(const void *key1, const void *key2)
 {
@@ -672,23 +712,10 @@ vs_cache_compare(const void *key1, const void *key2)
         return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
 }
 
-static void
-delete_from_cache_if_matches(struct hash_table *ht,
-                             struct v3d_compiled_shader **last_compile,
-                             struct hash_entry *entry,
-                             struct v3d_uncompiled_shader *so)
+static bool
+cs_cache_compare(const void *key1, const void *key2)
 {
-        const struct v3d_key *key = entry->key;
-
-        if (key->shader_state == so) {
-                struct v3d_compiled_shader *shader = entry->data;
-                _mesa_hash_table_remove(ht, entry);
-
-                if (shader == *last_compile)
-                        *last_compile = NULL;
-
-                v3d_free_compiled_shader(shader);
-        }
+        return memcmp(key1, key2, sizeof(struct v3d_key)) == 0;
 }
 
 static void
@@ -696,14 +723,26 @@ v3d_shader_state_delete(struct pipe_context *pctx, void *hwcso)
 {
         struct v3d_context *v3d = v3d_context(pctx);
         struct v3d_uncompiled_shader *so = hwcso;
+        nir_shader *s = so->base.ir.nir;
 
-        hash_table_foreach(v3d->fs_cache, entry) {
-                delete_from_cache_if_matches(v3d->fs_cache, &v3d->prog.fs,
-                                             entry, so);
-        }
-        hash_table_foreach(v3d->vs_cache, entry) {
-                delete_from_cache_if_matches(v3d->vs_cache, &v3d->prog.vs,
-                                             entry, so);
+        hash_table_foreach(v3d->prog.cache[s->info.stage], entry) {
+                const struct v3d_key *key = entry->key;
+                struct v3d_compiled_shader *shader = entry->data;
+
+                if (key->shader_state != so)
+                        continue;
+
+                if (v3d->prog.fs == shader)
+                        v3d->prog.fs = NULL;
+                if (v3d->prog.vs == shader)
+                        v3d->prog.vs = NULL;
+                if (v3d->prog.cs == shader)
+                        v3d->prog.cs = NULL;
+                if (v3d->prog.compute == shader)
+                        v3d->prog.compute = NULL;
+
+                _mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry);
+                v3d_free_compiled_shader(shader);
         }
 
         ralloc_free(so->base.ir.nir);
@@ -726,6 +765,22 @@ v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso)
         v3d->dirty |= VC5_DIRTY_UNCOMPILED_VS;
 }
 
+static void
+v3d_compute_state_bind(struct pipe_context *pctx, void *state)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+
+        v3d->prog.bind_compute = state;
+}
+
+static void *
+v3d_create_compute_state(struct pipe_context *pctx,
+                         const struct pipe_compute_state *cso)
+{
+        return v3d_uncompiled_shader_create(pctx, cso->ir_type,
+                                            (void *)cso->prog);
+}
+
 void
 v3d_program_init(struct pipe_context *pctx)
 {
@@ -740,10 +795,18 @@ v3d_program_init(struct pipe_context *pctx)
         pctx->bind_fs_state = v3d_fp_state_bind;
         pctx->bind_vs_state = v3d_vp_state_bind;
 
-        v3d->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash,
-                                                fs_cache_compare);
-        v3d->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
-                                                vs_cache_compare);
+        if (v3d->screen->has_csd) {
+                pctx->create_compute_state = v3d_create_compute_state;
+                pctx->delete_compute_state = v3d_shader_state_delete;
+                pctx->bind_compute_state = v3d_compute_state_bind;
+        }
+
+        v3d->prog.cache[MESA_SHADER_VERTEX] =
+                _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare);
+        v3d->prog.cache[MESA_SHADER_FRAGMENT] =
+                _mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare);
+        v3d->prog.cache[MESA_SHADER_COMPUTE] =
+                _mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare);
 }
 
 void
@@ -751,16 +814,16 @@ v3d_program_fini(struct pipe_context *pctx)
 {
         struct v3d_context *v3d = v3d_context(pctx);
 
-        hash_table_foreach(v3d->fs_cache, entry) {
-                struct v3d_compiled_shader *shader = entry->data;
-                v3d_free_compiled_shader(shader);
-                _mesa_hash_table_remove(v3d->fs_cache, entry);
-        }
+        for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+                struct hash_table *cache = v3d->prog.cache[i];
+                if (!cache)
+                        continue;
 
-        hash_table_foreach(v3d->vs_cache, entry) {
-                struct v3d_compiled_shader *shader = entry->data;
-                v3d_free_compiled_shader(shader);
-                _mesa_hash_table_remove(v3d->vs_cache, entry);
+                hash_table_foreach(cache, entry) {
+                        struct v3d_compiled_shader *shader = entry->data;
+                        v3d_free_compiled_shader(shader);
+                        _mesa_hash_table_remove(cache, entry);
+                }
         }
 
         v3d_bo_unreference(&v3d->prog.spill_bo);
index afac781725a651e886ee9af2353522592b4ccc44..b77e3d9060e4f7f98f40906ffbf626afa4f0efc8 100644 (file)
@@ -22,6 +22,8 @@
  * IN THE SOFTWARE.
  */
 
+#include <sys/sysinfo.h>
+
 #include "util/os_misc.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
@@ -122,7 +124,6 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
         case PIPE_CAP_OCCLUSION_QUERY:
         case PIPE_CAP_POINT_SPRITE:
         case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
-        case PIPE_CAP_COMPUTE:
         case PIPE_CAP_DRAW_INDIRECT:
         case PIPE_CAP_MULTI_DRAW_INDIRECT:
         case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
@@ -143,6 +144,9 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
         case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
            return 0;
 
+        case PIPE_CAP_COMPUTE:
+                return screen->has_csd && screen->devinfo.ver >= 41;
+
         case PIPE_CAP_GENERATE_MIPMAP:
                 return v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_TFU);
 
@@ -260,8 +264,15 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
 {
         struct v3d_screen *screen = v3d_screen(pscreen);
 
-        if (shader != PIPE_SHADER_VERTEX &&
-            shader != PIPE_SHADER_FRAGMENT) {
+        switch (shader) {
+        case PIPE_SHADER_VERTEX:
+        case PIPE_SHADER_FRAGMENT:
+                break;
+        case PIPE_SHADER_COMPUTE:
+                if (!screen->has_csd)
+                        return 0;
+                break;
+        default:
                 return 0;
         }
 
@@ -335,7 +346,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
         case PIPE_SHADER_CAP_PREFERRED_IR:
                 return PIPE_SHADER_IR_NIR;
         case PIPE_SHADER_CAP_SUPPORTED_IRS:
-                return 0;
+                return 1 << PIPE_SHADER_IR_NIR;
         case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
                 return 32;
         case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
@@ -348,6 +359,86 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
         return 0;
 }
 
+static int
+v3d_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
+                      enum pipe_compute_cap param, void *ret)
+{
+        struct v3d_screen *screen = v3d_screen(pscreen);
+
+        if (!screen->has_csd)
+                return 0;
+
+#define RET(x) do {                                     \
+                if (ret)                                \
+                        memcpy(ret, x, sizeof(x));      \
+                return sizeof(x);                       \
+        } while (0)
+
+        switch (param) {
+        case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+                RET((uint32_t []) { 32 });
+                break;
+
+        case PIPE_COMPUTE_CAP_IR_TARGET:
+                sprintf(ret, "v3d");
+                return strlen(ret);
+
+        case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+                RET((uint64_t []) { 3 });
+
+        case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+                /* GL_MAX_COMPUTE_SHADER_WORK_GROUP_COUNT: The CSD has a
+                 * 16-bit field for the number of workgroups in each
+                 * dimension.
+                 */
+                RET(((uint64_t []) { 65535, 65535, 65535 }));
+
+        case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+                /* GL_MAX_COMPUTE_WORK_GROUP_SIZE */
+                RET(((uint64_t []) { 256, 256, 256 }));
+
+        case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+        case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+                /* GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS: This is
+                 * limited by WG_SIZE in the CSD.
+                 */
+                RET((uint64_t []) { 256 });
+
+        case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+                RET((uint64_t []) { 1024 * 1024 * 1024 });
+
+        case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+                /* GL_MAX_COMPUTE_SHARED_MEMORY_SIZE */
+                RET((uint64_t []) { 32768 });
+
+        case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+        case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+                RET((uint64_t []) { 4096 });
+
+        case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
+                struct sysinfo si;
+                sysinfo(&si);
+                RET((uint64_t []) { si.totalram });
+        }
+
+        case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+                /* OpenCL only */
+                RET((uint32_t []) { 0 });
+
+        case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+                RET((uint32_t []) { 1 });
+
+        case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+                RET((uint32_t []) { 1 });
+
+        case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+                RET((uint32_t []) { 16 });
+
+        }
+
+        return 0;
+}
+
 static boolean
 v3d_screen_is_format_supported(struct pipe_screen *pscreen,
                                enum pipe_format format,
@@ -565,6 +656,7 @@ v3d_screen_create(int fd, struct renderonly *ro)
         pscreen->get_param = v3d_screen_get_param;
         pscreen->get_paramf = v3d_screen_get_paramf;
         pscreen->get_shader_param = v3d_screen_get_shader_param;
+        pscreen->get_compute_param = v3d_get_compute_param;
         pscreen->context_create = v3d_context_create;
         pscreen->is_format_supported = v3d_screen_is_format_supported;
 
@@ -590,6 +682,8 @@ v3d_screen_create(int fd, struct renderonly *ro)
 
         slab_create_parent(&screen->transfer_pool, sizeof(struct v3d_transfer), 16);
 
+        screen->has_csd = false; /* until the UABI is enabled. */
+
         v3d_fence_init(screen);
 
         v3d_process_debug_variable();
index 94ae8b30f3a7d1c3b5092d888899ee852509713f..6e90755e77ca12fc3b328de377aece0c1497dea0 100644 (file)
@@ -77,6 +77,8 @@ struct v3d_screen {
         uint32_t bo_size;
         uint32_t bo_count;
 
+        bool has_csd;
+
         struct v3d_simulator_file *sim_file;
 };
 
index a5532bdf2b4561d18f30312465b61939da2e74bc..77101947e2b31b4cd2dbc42ce173961df6c0dab4 100644 (file)
@@ -358,6 +358,16 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
                                        v3d->prog.spill_size_per_thread);
                         break;
 
+                case QUNIFORM_NUM_WORK_GROUPS:
+                        cl_aligned_u32(&uniforms,
+                                       v3d->compute_num_workgroups[data]);
+                        break;
+
+                case QUNIFORM_SHARED_OFFSET:
+                        cl_aligned_reloc(&job->indirect, &uniforms,
+                                         v3d->compute_shared_memory, 0);
+                        break;
+
                 default:
                         assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
 
@@ -444,6 +454,11 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader)
                         dirty |= VC5_DIRTY_ZSA;
                         break;
 
+                case QUNIFORM_NUM_WORK_GROUPS:
+                case QUNIFORM_SHARED_OFFSET:
+                        /* Compute always recalculates uniforms. */
+                        break;
+
                 default:
                         assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
                         dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
index 14e85e784858a96996f71b867653eb1cabe08d51..14e95c71204c7c31ad6c411af33ccf81f6f7b40d 100644 (file)
@@ -489,7 +489,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
         /* Before setting up the draw, flush anything writing to the textures
          * that we read from.
          */
-        for (int s = 0; s < PIPE_SHADER_TYPES; s++)
+        for (int s = 0; s < PIPE_SHADER_COMPUTE; s++)
                 v3d_predraw_check_stage_inputs(pctx, s);
 
         if (info->indirect)
@@ -514,7 +514,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
         /* Mark SSBOs as being written.  We don't actually know which ones are
          * read vs written, so just assume the worst
          */
-        for (int s = 0; s < PIPE_SHADER_TYPES; s++) {
+        for (int s = 0; s < PIPE_SHADER_COMPUTE; s++) {
                 foreach_bit(i, v3d->ssbo[s].enabled_mask) {
                         v3d_job_add_write_resource(job,
                                                    v3d->ssbo[s].sb[i].buffer);