freedreno/ir3: Implement primitive layout intrinsics
authorKristian H. Kristensen <hoegsberg@google.com>
Fri, 11 Oct 2019 04:02:45 +0000 (21:02 -0700)
committerKristian H. Kristensen <hoegsberg@google.com>
Thu, 17 Oct 2019 20:43:53 +0000 (13:43 -0700)
This implements the load_vs_primitive_stride_ir3,
load_vs_vertex_stride_ir3 and load_primitive_location_ir3 intrinsics,
used for getting the primitive layout strides and locations.

Signed-off-by: Kristian H. Kristensen <hoegsberg@google.com>
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_nir.c
src/freedreno/ir3/ir3_shader.h
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.h
src/gallium/drivers/freedreno/a6xx/fd6_program.c
src/gallium/drivers/freedreno/ir3/ir3_gallium.c
src/gallium/drivers/freedreno/ir3/ir3_gallium.h

index 2cc720652fc4f8d73aacfdccd3979d8f9962031c..04c44b90dfea8a7515a887a91137cec21af45acf 100644 (file)
@@ -1320,6 +1320,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                dst = NULL;
        }
 
+       const unsigned primitive_param = ctx->so->shader->const_state.offsets.primitive_param * 4;
+       const unsigned primitive_map = ctx->so->shader->const_state.offsets.primitive_map * 4;
+
        switch (intr->intrinsic) {
        case nir_intrinsic_load_uniform:
                idx = nir_intrinsic_base(intr);
@@ -1344,6 +1347,18 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                                        ctx->so->shader->ubo_state.size / 16);
                }
                break;
+
+       case nir_intrinsic_load_vs_primitive_stride_ir3:
+               dst[0] = create_uniform(b, primitive_param + 0);
+               break;
+       case nir_intrinsic_load_vs_vertex_stride_ir3:
+               dst[0] = create_uniform(b, primitive_param + 1);
+               break;
+       case nir_intrinsic_load_primitive_location_ir3:
+               idx = nir_intrinsic_driver_location(intr);
+               dst[0] = create_uniform(b, primitive_map + idx);
+               break;
+
        case nir_intrinsic_load_ubo:
                emit_intrinsic_load_ubo(ctx, intr, dst);
                break;
index 103821cd6b359ba8bec78f34097b485489a0b13f..f9d059254fda1dd3666c775dec9a050590bf5818 100644 (file)
@@ -445,5 +445,19 @@ ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir)
                constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
        }
 
+       switch (shader->type) {
+       case MESA_SHADER_VERTEX:
+               const_state->offsets.primitive_param = constoff;
+               constoff += 1;
+               break;
+       case MESA_SHADER_GEOMETRY:
+               const_state->offsets.primitive_param = constoff;
+               const_state->offsets.primitive_map = constoff + 1;
+               constoff += 1 + DIV_ROUND_UP(nir->num_inputs, 4);
+               break;
+       default:
+               break;
+       }
+
        const_state->offsets.immediate = constoff;
 }
index ce25886565897eced1863a89cdb34a264ea4e125..91c992eb8d863ce88075a8cda9457ffe98161a83 100644 (file)
@@ -122,6 +122,8 @@ struct ir3_const_state {
                unsigned image_dims;
                unsigned driver_param;
                unsigned tfbo;
+               unsigned primitive_param;
+               unsigned primitive_map;
                unsigned immediate;
        } offsets;
 
index b732372f055afa5d954ae62cd1a4b8a4a11ff478..6d27b3419358e2fb126dd1bb3c05c606cc45dedc 100644 (file)
@@ -790,6 +790,28 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3
        }
 }
 
+static void
+fd6_emit_tess_const(struct fd6_emit *emit)
+{
+       struct fd_context *ctx = emit->ctx;
+       const unsigned vs_regid = emit->vs->shader->const_state.offsets.primitive_param;
+       const unsigned gs_regid = emit->gs->shader->const_state.offsets.primitive_param;
+       uint32_t num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
+
+       uint32_t params[4] = {
+               emit->vs->shader->output_size * num_vertices * 4,       /* vs primitive stride */
+               emit->vs->shader->output_size * 4,                                      /* vs vertex stride */
+               0, 0,
+       };
+
+       struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
+               ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
+
+       fd6_emit_const(constobj, emit->vs->type, vs_regid * 4, 0, ARRAY_SIZE(params), params, NULL);
+       fd6_emit_const(constobj, emit->gs->type, gs_regid * 4, 0, ARRAY_SIZE(params), params, NULL);
+
+       fd6_emit_take_group(emit, constobj, FD6_GROUP_PRIMITIVE_PARAMS, 0x7);
+}
 
 static void
 fd6_emit_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v,
@@ -968,6 +990,9 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
        fd6_emit_consts(emit, gs, PIPE_SHADER_GEOMETRY, FD6_GROUP_GS_CONST, 0x7);
        fd6_emit_consts(emit, fs, PIPE_SHADER_FRAGMENT, FD6_GROUP_FS_CONST, 0x6);
 
+       if (emit->key.key.has_gs)
+               fd6_emit_tess_const(emit);
+
        /* if driver-params are needed, emit each time: */
        if (ir3_needs_vs_driver_params(vs)) {
                struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
index 107723fb07649e29885a9087c318e6d85884d441..f2f1a7f808508017e9b4b7d4b305e017dea34ecb 100644 (file)
@@ -55,6 +55,7 @@ enum fd6_state_id {
        FD6_GROUP_GS_CONST,
        FD6_GROUP_FS_CONST,
        FD6_GROUP_VS_DRIVER_PARAMS,
+       FD6_GROUP_PRIMITIVE_PARAMS,
        FD6_GROUP_VS_TEX,
        FD6_GROUP_HS_TEX,
        FD6_GROUP_DS_TEX,
index 045658ce2c0dfed115671e001f7bdf0003157e8a..f64cb982cd76fc5a245589b6855af2b6cab1efd6 100644 (file)
@@ -537,6 +537,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                         A6XX_VPC_PACK_PSIZELOC(psize_loc) |
                         A6XX_VPC_PACK_STRIDE_IN_VPC(l.max_loc));
 
+       if (gs) {
+               ir3_emit_immediates(screen, gs, ring);
+               ir3_emit_link_map(screen, vs, gs, ring);
+       }
+
        if (!binning_pass) {
                /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
                for (j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
@@ -589,10 +594,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                ir3_emit_immediates(screen, ds, ring);
        }
 
-       if (gs) {
-               ir3_emit_immediates(screen, gs, ring);
-       }
-
        if (!binning_pass)
                ir3_emit_immediates(screen, fs, ring);
 }
index 11a4dc78161a15f59417af459af7f8001c93630d..e5ea424b221dca28c2d0822b001678c8d77f5202 100644 (file)
@@ -396,6 +396,54 @@ ir3_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v
        }
 }
 
+static uint32_t
+link_geometry_stages(const struct ir3_shader_variant *producer,
+               const struct ir3_shader_variant *consumer,
+               uint32_t *locs)
+{
+       uint32_t num_loc = 0;
+
+       nir_foreach_variable(in_var, &consumer->shader->nir->inputs) {
+               nir_foreach_variable(out_var, &producer->shader->nir->outputs) {
+                       if (in_var->data.location == out_var->data.location) {
+                               locs[in_var->data.driver_location] =
+                                       producer->shader->output_loc[out_var->data.driver_location] * 4;
+
+                               debug_assert(num_loc <= in_var->data.driver_location + 1);
+                               num_loc = in_var->data.driver_location + 1;
+                       }
+               }
+       }
+
+       return num_loc;
+}
+
+void
+ir3_emit_link_map(struct fd_screen *screen,
+               const struct ir3_shader_variant *producer,
+               const struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+{
+       const struct ir3_const_state *const_state = &v->shader->const_state;
+       uint32_t base = const_state->offsets.primitive_map;
+       uint32_t patch_locs[MAX_VARYING] = { }, num_loc;
+
+       num_loc = link_geometry_stages(producer, v, patch_locs);
+
+       int size = DIV_ROUND_UP(num_loc, 4);
+
+       /* truncate size to avoid writing constants that shader
+        * does not use:
+        */
+       size = MIN2(size + base, v->constlen) - base;
+
+       /* convert out of vec4: */
+       base *= 4;
+       size *= 4;
+
+       if (size > 0)
+               emit_const(screen, ring, v, base, 0, size, patch_locs, NULL);
+}
+
 /* emit stream-out buffers: */
 static void
 emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
index 5352df5199080e24f2e916126a490454ffd594d7..33a94a6a9f8ab14c0c558f42c92181780270e4b1 100644 (file)
@@ -63,6 +63,9 @@ void ir3_emit_image_dims(struct fd_screen *screen, const struct ir3_shader_varia
                struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si);
 void ir3_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring);
+void ir3_emit_link_map(struct fd_screen *screen,
+               const struct ir3_shader_variant *producer,
+               const struct ir3_shader_variant *v, struct fd_ringbuffer *ring);
 
 static inline bool
 ir3_needs_vs_driver_params(const struct ir3_shader_variant *v)