freedreno/ir3: add support for a650 tess shared storage
authorJonathan Marek <jonathan@marek.ca>
Mon, 6 Jul 2020 02:53:39 +0000 (22:53 -0400)
committerMarge Bot <eric+marge@anholt.net>
Wed, 8 Jul 2020 02:30:23 +0000 (02:30 +0000)
A650 uses LDL/STL, and the "local_primitive_id" in tess ctrl shader comes
from bits 16-21 in the header instead of 0-5.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5764>

src/freedreno/ir3/ir3_compiler.c
src/freedreno/ir3/ir3_compiler.h
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_nir.c
src/freedreno/ir3/ir3_nir.h
src/freedreno/ir3/ir3_nir_lower_tess.c

index 9fc00d8f91f3247319b5984de1f47abb60c6f628..342282ca7db9d82aeaf315eed8960c81bc90605e 100644 (file)
@@ -93,6 +93,9 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id)
                 * TODO: is this true on earlier gen's?
                 */
                compiler->max_const_compute = 256;
+
+               if (compiler->gpu_id == 650)
+                       compiler->tess_use_shared = true;
        } else {
                compiler->max_const_pipeline = 512;
                compiler->max_const_geom = 512;
index 54a1afd5a257ca41e75416d319eab0124f1f9108..663e0c531e031522196e64c69b75c9b09d00a0d7 100644 (file)
@@ -74,6 +74,9 @@ struct ir3_compiler {
         */
        bool samgq_workaround;
 
+       /* on a650, vertex shader <-> tess control io uses LDL/STL */
+       bool tess_use_shared;
+
        /* The maximum number of constants, in vec4's, across the entire graphics
         * pipeline.
         */
index ad0124a8adbf4c80dbb460c654d28c0121378fde..342f5e3ce17fda8187a0c54ab811570f134f71a9 100644 (file)
@@ -928,6 +928,10 @@ emit_intrinsic_load_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *int
                        create_immed(b, intr->num_components), 0,
                        create_immed(b, base), 0);
 
+       /* for a650, use LDL for tess ctrl inputs: */
+       if (ctx->so->type == MESA_SHADER_TESS_CTRL && ctx->compiler->tess_use_shared)
+               load->opc = OPC_LDL;
+
        load->cat6.type = utype_dst(intr->dest);
        load->regs[0]->wrmask = MASK(intr->num_components);
 
@@ -952,6 +956,11 @@ emit_intrinsic_store_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *in
                ir3_create_collect(ctx, value, intr->num_components), 0,
                create_immed(b, intr->num_components), 0);
 
+       /* for a650, use STL for vertex outputs used by tess ctrl shader: */
+       if (ctx->so->type == MESA_SHADER_VERTEX && ctx->so->key.tessellation &&
+               ctx->compiler->tess_use_shared)
+               store->opc = OPC_STL;
+
        store->cat6.dst_offset = nir_intrinsic_base(intr);
        store->cat6.type = utype_src(intr->src[0]);
        store->barrier_class = IR3_BARRIER_SHARED_W;
index f37d29947997e5349ada7e5b2ca2c34e7562772a..903f136860b26548a4f47629777625e05cda5ffc 100644 (file)
@@ -379,7 +379,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
                        break;
                case MESA_SHADER_TESS_CTRL:
                        NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so, so->key.tessellation);
-                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
+                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so->shader->compiler);
                        progress = true;
                        break;
                case MESA_SHADER_TESS_EVAL:
@@ -389,7 +389,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
                        progress = true;
                        break;
                case MESA_SHADER_GEOMETRY:
-                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
+                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so->shader->compiler);
                        progress = true;
                        break;
                default:
index 8bc6d342fe0b16875a5f7c2e8e37197da7c81f71..4126d4e48686107673271acebe50e1aeedfd7e8c 100644 (file)
@@ -46,7 +46,7 @@ bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
 
 void ir3_nir_lower_to_explicit_output(nir_shader *shader,
                struct ir3_shader_variant *v, unsigned topology);
-void ir3_nir_lower_to_explicit_input(nir_shader *shader);
+void ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler);
 void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology);
 void ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology);
 void ir3_nir_lower_gs(nir_shader *shader);
index 4c06b458665f1484b5668b37722c3bb76e77c211..e084f99402de8e92b82766eeb1947ab2a7cba68d 100644 (file)
@@ -42,6 +42,9 @@ struct state {
 
        struct exec_list old_outputs;
        struct exec_list emit_outputs;
+
+       /* tess ctrl shader on a650 gets the local primitive id at different bits: */
+       bool local_primitive_id_start;
 };
 
 static nir_ssa_def *
@@ -66,7 +69,7 @@ build_vertex_id(nir_builder *b, struct state *state)
 static nir_ssa_def *
 build_local_primitive_id(nir_builder *b, struct state *state)
 {
-       return bitfield_extract(b, state->header, 0, 63);
+       return bitfield_extract(b, state->header, state->local_primitive_id_start, 63);
 }
 
 static nir_variable *
@@ -301,10 +304,16 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *st
 }
 
 void
-ir3_nir_lower_to_explicit_input(nir_shader *shader)
+ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler)
 {
        struct state state = { };
 
+       /* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
+        * HS uses a different primitive id, which starts at bit 16 in the header
+        */
+       if (shader->info.stage == MESA_SHADER_TESS_CTRL && compiler->tess_use_shared)
+               state.local_primitive_id_start = 16;
+
        nir_function_impl *impl = nir_shader_get_entrypoint(shader);
        assert(impl);