freedreno: Split ir3_const's user buffer and indirect upload APIs.
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_const.h
index 64cd39684ad3045ee6a4246c5cad3287dee10464..f840e5ee1d7a98da59a7695707da74638b787aa7 100644 (file)
 
 static bool is_stateobj(struct fd_ringbuffer *ring);
 
-static void emit_const(struct fd_ringbuffer *ring,
-               const struct ir3_shader_variant *v, uint32_t dst_offset,
-               uint32_t offset, uint32_t size,
-               const void *user_buffer, struct pipe_resource *buffer);
+static void emit_const_user(struct fd_ringbuffer *ring,
+               const struct ir3_shader_variant *v, uint32_t regid,
+               uint32_t size, const uint32_t *user_buffer);
 
 static void emit_const_bo(struct fd_ringbuffer *ring,
+               const struct ir3_shader_variant *v, uint32_t regid,
+               uint32_t offset, uint32_t size,
+               struct fd_bo *bo);
+
+static void emit_const_prsc(struct fd_ringbuffer *ring,
+               const struct ir3_shader_variant *v, uint32_t regid,
+               uint32_t offset, uint32_t size,
+               struct pipe_resource *buffer)
+{
+       struct fd_resource *rsc = fd_resource(buffer);
+       emit_const_bo(ring, v, regid, offset, size, rsc->bo);
+}
+
+static void emit_const_ptrs(struct fd_ringbuffer *ring,
                const struct ir3_shader_variant *v, uint32_t dst_offset,
                uint32_t num, struct pipe_resource **prscs, uint32_t *offsets);
 
+static void
+emit_const_asserts(struct fd_ringbuffer *ring,
+               const struct ir3_shader_variant *v,
+               uint32_t regid, uint32_t sizedwords)
+{
+       assert((regid % 4) == 0);
+       assert((sizedwords % 4) == 0);
+       assert(regid + sizedwords <= v->constlen * 4);
+}
 
 static void
 ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)
@@ -93,12 +115,12 @@ static inline void
 ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
 {
-       struct ir3_ubo_analysis_state *state;
-       state = &v->shader->ubo_state;
+       const struct ir3_const_state *const_state = ir3_const_state(v);
+       const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
 
        for (unsigned i = 0; i < state->num_enabled; i++) {
-               assert(!state->range[i].bindless);
-               unsigned ubo = state->range[i].block;
+               assert(!state->range[i].ubo.bindless);
+               unsigned ubo = state->range[i].ubo.block;
                if (!(constbuf->enabled_mask & (1 << ubo)))
                        continue;
                struct pipe_constant_buffer *cb = &constbuf->cb[ubo];
@@ -125,28 +147,52 @@ ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *
                debug_assert((size % 16) == 0);
                debug_assert((offset % 16) == 0);
 
-               emit_const(ring, v, state->range[i].offset / 4,
-                               offset, size / 4, cb->user_buffer, cb->buffer);
+               if (cb->user_buffer) {
+                       emit_const_user(ring, v, state->range[i].offset / 4,
+                               size / 4, cb->user_buffer + state->range[i].start);
+               } else {
+                       emit_const_prsc(ring, v, state->range[i].offset / 4,
+                                       offset, size / 4, cb->buffer);
+               }
        }
 }
 
 static inline void
-ir3_emit_ubos(struct fd_screen *screen, const struct ir3_shader_variant *v,
+ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
 {
-       const struct ir3_const_state *const_state = &v->shader->const_state;
+       const struct ir3_const_state *const_state = ir3_const_state(v);
        uint32_t offset = const_state->offsets.ubo;
+
+       /* a6xx+ uses UBO state and ldc instead of pointers emitted in
+        * const state and ldg:
+        */
+       if (ctx->screen->gpu_id >= 600)
+               return;
+
        if (v->constlen > offset) {
                uint32_t params = const_state->num_ubos;
                uint32_t offsets[params];
                struct pipe_resource *prscs[params];
 
                for (uint32_t i = 0; i < params; i++) {
-                       const uint32_t index = i + 1;   /* UBOs start at index 1 */
-                       struct pipe_constant_buffer *cb = &constbuf->cb[index];
-                       assert(!cb->user_buffer);
+                       struct pipe_constant_buffer *cb = &constbuf->cb[i];
 
-                       if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) {
+                       /* If we have user pointers (constbuf 0, aka GL uniforms), upload
+                        * them to a buffer now, and save it in the constbuf so that we
+                        * don't have to reupload until they get changed.
+                        */
+                       if (cb->user_buffer) {
+                               struct pipe_context *pctx = &ctx->base;
+                               u_upload_data(pctx->stream_uploader, 0,
+                                               cb->buffer_size,
+                                               64,
+                                               cb->user_buffer,
+                                               &cb->buffer_offset, &cb->buffer);
+                               cb->user_buffer = NULL;
+                       }
+
+                       if ((constbuf->enabled_mask & (1 << i)) && cb->buffer) {
                                offsets[i] = cb->buffer_offset;
                                prscs[i] = cb->buffer;
                        } else {
@@ -157,7 +203,7 @@ ir3_emit_ubos(struct fd_screen *screen, const struct ir3_shader_variant *v,
 
                assert(offset * 4 + params <= v->constlen * 4);
 
-               emit_const_bo(ring, v, offset * 4, params, prscs, offsets);
+               emit_const_ptrs(ring, v, offset * 4, params, prscs, offsets);
        }
 }
 
@@ -165,7 +211,7 @@ static inline void
 ir3_emit_ssbo_sizes(struct fd_screen *screen, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
 {
-       const struct ir3_const_state *const_state = &v->shader->const_state;
+       const struct ir3_const_state *const_state = ir3_const_state(v);
        uint32_t offset = const_state->offsets.ssbo_sizes;
        if (v->constlen > offset) {
                uint32_t sizes[align(const_state->ssbo_size.count, 4)];
@@ -177,7 +223,7 @@ ir3_emit_ssbo_sizes(struct fd_screen *screen, const struct ir3_shader_variant *v
                        sizes[off] = sb->sb[index].buffer_size;
                }
 
-               emit_const(ring, v, offset * 4, 0, ARRAY_SIZE(sizes), sizes, NULL);
+               emit_const_user(ring, v, offset * 4, ARRAY_SIZE(sizes), sizes);
        }
 }
 
@@ -185,7 +231,7 @@ static inline void
 ir3_emit_image_dims(struct fd_screen *screen, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si)
 {
-       const struct ir3_const_state *const_state = &v->shader->const_state;
+       const struct ir3_const_state *const_state = ir3_const_state(v);
        uint32_t offset = const_state->offsets.image_dims;
        if (v->constlen > offset) {
                uint32_t dims[align(const_state->image_dims.count, 4)];
@@ -209,7 +255,7 @@ ir3_emit_image_dims(struct fd_screen *screen, const struct ir3_shader_variant *v
                                 * be the same, so use original dimensions for y and z
                                 * stride:
                                 */
-                               dims[off + 1] = slice->pitch;
+                               dims[off + 1] = fd_resource_pitch(rsc, img->u.tex.level);
                                /* see corresponding logic in fd_resource_offset(): */
                                if (rsc->layout.layer_first) {
                                        dims[off + 2] = rsc->layout.layer_size;
@@ -230,7 +276,7 @@ ir3_emit_image_dims(struct fd_screen *screen, const struct ir3_shader_variant *v
                }
                uint32_t size = MIN2(ARRAY_SIZE(dims), v->constlen * 4 - offset * 4);
 
-               emit_const(ring, v, offset * 4, 0, size, dims, NULL);
+               emit_const_user(ring, v, offset * 4, size, dims);
        }
 }
 
@@ -238,9 +284,9 @@ static inline void
 ir3_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring)
 {
-       const struct ir3_const_state *const_state = &v->shader->const_state;
+       const struct ir3_const_state *const_state = ir3_const_state(v);
        uint32_t base = const_state->offsets.immediate;
-       int size = const_state->immediates_count;
+       int size = DIV_ROUND_UP(const_state->immediates_count, 4);
 
        /* truncate size to avoid writing constants that shader
         * does not use:
@@ -252,7 +298,7 @@ ir3_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v
        size *= 4;
 
        if (size > 0)
-               emit_const(ring, v, base, 0, size, const_state->immediates[0].val, NULL);
+               emit_const_user(ring, v, base, size, const_state->immediates);
 }
 
 static inline void
@@ -260,7 +306,7 @@ ir3_emit_link_map(struct fd_screen *screen,
                const struct ir3_shader_variant *producer,
                const struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
 {
-       const struct ir3_const_state *const_state = &v->shader->const_state;
+       const struct ir3_const_state *const_state = ir3_const_state(v);
        uint32_t base = const_state->offsets.primitive_map;
        uint32_t patch_locs[MAX_VARYING] = { }, num_loc;
 
@@ -278,7 +324,7 @@ ir3_emit_link_map(struct fd_screen *screen,
        size *= 4;
 
        if (size > 0)
-               emit_const(ring, v, base, 0, size, patch_locs, NULL);
+               emit_const_user(ring, v, base, size, patch_locs);
 }
 
 /* emit stream-out buffers: */
@@ -287,7 +333,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring)
 {
        /* streamout addresses after driver-params: */
-       const struct ir3_const_state *const_state = &v->shader->const_state;
+       const struct ir3_const_state *const_state = ir3_const_state(v);
        uint32_t offset = const_state->offsets.tfbo;
        if (v->constlen > offset) {
                struct fd_streamout_stateobj *so = &ctx->streamout;
@@ -311,7 +357,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
 
                assert(offset * 4 + params <= v->constlen * 4);
 
-               emit_const_bo(ring, v, offset * 4, params, prscs, offsets);
+               emit_const_ptrs(ring, v, offset * 4, params, prscs, offsets);
        }
 }
 
@@ -391,7 +437,7 @@ emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
                ring_wfi(ctx->batch, ring);
 
                ir3_emit_user_consts(ctx->screen, v, ring, constbuf);
-               ir3_emit_ubos(ctx->screen, v, ring, constbuf);
+               ir3_emit_ubos(ctx, v, ring, constbuf);
                if (shader_dirty)
                        ir3_emit_immediates(ctx->screen, v, ring);
        }
@@ -412,7 +458,7 @@ emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
 static inline bool
 ir3_needs_vs_driver_params(const struct ir3_shader_variant *v)
 {
-       const struct ir3_const_state *const_state = &v->shader->const_state;
+       const struct ir3_const_state *const_state = ir3_const_state(v);
        uint32_t offset = const_state->offsets.driver_param;
 
        return v->constlen > offset;
@@ -425,18 +471,15 @@ ir3_emit_vs_driver_params(const struct ir3_shader_variant *v,
 {
        debug_assert(ir3_needs_vs_driver_params(v));
 
-       const struct ir3_const_state *const_state = &v->shader->const_state;
+       const struct ir3_const_state *const_state = ir3_const_state(v);
        uint32_t offset = const_state->offsets.driver_param;
        uint32_t vertex_params[IR3_DP_VS_COUNT] = {
-                       [IR3_DP_VTXID_BASE] = info->index_size ?
+                       [IR3_DP_DRAWID]      = 0,  /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
+                       [IR3_DP_VTXID_BASE]  = info->index_size ?
                                        info->index_bias : info->start,
-                                       [IR3_DP_VTXCNT_MAX] = max_tf_vtx(ctx, v),
+                       [IR3_DP_INSTID_BASE] = info->start_instance,
+                       [IR3_DP_VTXCNT_MAX]  = max_tf_vtx(ctx, v),
        };
-       /* if no user-clip-planes, we don't need to emit the
-        * entire thing:
-        */
-       uint32_t vertex_params_size = 4;
-
        if (v->key.ucp_enables) {
                struct pipe_clip_state *ucp = &ctx->ucp;
                unsigned pos = IR3_DP_UCP0_X;
@@ -446,10 +489,16 @@ ir3_emit_vs_driver_params(const struct ir3_shader_variant *v,
                                pos++;
                        }
                }
-               vertex_params_size = ARRAY_SIZE(vertex_params);
        }
 
-       vertex_params_size = MAX2(vertex_params_size, const_state->num_driver_params);
+       /* Only emit as many params as needed, i.e. up to the highest enabled UCP
+        * plane. However a binning pass may drop even some of these, so limit to
+        * program max.
+        */
+       const uint32_t vertex_params_size = MIN2(
+                       const_state->num_driver_params,
+                       (v->constlen - offset) * 4);
+       assert(vertex_params_size <= IR3_DP_VS_COUNT);
 
        bool needs_vtxid_base =
                ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) != regid(63, 0);
@@ -483,13 +532,13 @@ ir3_emit_vs_driver_params(const struct ir3_shader_variant *v,
                ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0,
                                indirect->buffer, src_off, 1);
 
-               emit_const(ring, v, offset * 4, 0,
-                               vertex_params_size, NULL, vertex_params_rsc);
+               emit_const_prsc(ring, v, offset * 4, 0,
+                               vertex_params_size, vertex_params_rsc);
 
                pipe_resource_reference(&vertex_params_rsc, NULL);
        } else {
-               emit_const(ring, v, offset * 4, 0,
-                               vertex_params_size, vertex_params, NULL);
+               emit_const_user(ring, v, offset * 4,
+                               vertex_params_size, vertex_params);
        }
 
        /* if needed, emit stream-out buffer addresses: */
@@ -532,7 +581,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
        emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
 
        /* emit compute-shader driver-params: */
-       const struct ir3_const_state *const_state = &v->shader->const_state;
+       const struct ir3_const_state *const_state = ir3_const_state(v);
        uint32_t offset = const_state->offsets.driver_param;
        if (v->constlen > offset) {
                ring_wfi(ctx->batch, ring);
@@ -562,7 +611,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
                                indirect_offset = info->indirect_offset;
                        }
 
-                       emit_const(ring, v, offset * 4, indirect_offset, 4, NULL, indirect);
+                       emit_const_prsc(ring, v, offset * 4, indirect_offset, 4, indirect);
 
                        pipe_resource_reference(&indirect, NULL);
                } else {
@@ -577,7 +626,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
                        uint32_t size = MIN2(const_state->num_driver_params,
                                        v->constlen * 4 - offset * 4);
 
-                       emit_const(ring, v, offset * 4, 0, size, compute_params, NULL);
+                       emit_const_user(ring, v, offset * 4, size, compute_params);
                }
        }
 }