return vs->writes_point_size && ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode == MALI_POINTS;
}
-/* Stage the attribute descriptors so we can adjust src_offset
- * to let BOs align nicely */
-
-static void
-panfrost_stage_attributes(struct panfrost_context *ctx)
+void
+panfrost_vertex_state_upd_attr_offs(struct panfrost_context *ctx,
+ struct midgard_payload_vertex_tiler *vp)
{
- struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
- struct panfrost_vertex_state *so = ctx->vertex;
-
- size_t sz = sizeof(struct mali_attr_meta) * PAN_MAX_ATTRIBUTE;
- struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sz);
- struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu;
+ if (!ctx->vertex)
+ return;
- /* Copy as-is for the first pass */
- memcpy(target, so->hw, sz);
+ struct panfrost_vertex_state *so = ctx->vertex;
/* Fixup offsets for the second pass. Recall that the hardware
* calculates attribute addresses as:
* QED.
*/
- unsigned start = ctx->payloads[PIPE_SHADER_VERTEX].offset_start;
+ unsigned start = vp->offset_start;
for (unsigned i = 0; i < so->num_elements; ++i) {
unsigned vbi = so->pipe[i].vertex_buffer_index;
struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
- struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
- mali_ptr addr = rsrc->bo->gpu + buf->buffer_offset;
/* Adjust by the masked off bits of the offset. Make sure we
* read src_offset from so->hw (which is not GPU visible)
* rather than target (which is) due to caching effects */
- unsigned src_offset = so->hw[i].src_offset;
- src_offset += (addr & 63);
+ unsigned src_offset = so->pipe[i].src_offset;
+
+ /* BOs aligned to 4k so guaranteed aligned to 64 */
+ src_offset += (buf->buffer_offset & 63);
/* Also, somewhat obscurely per-instance data needs to be
* offset in response to a delayed start in an indexed draw */
if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start)
src_offset -= buf->stride * start;
- target[i].src_offset = src_offset;
+ so->hw[i].src_offset = src_offset;
}
-
- /* Let's also include vertex builtins */
-
- struct mali_attr_meta builtin = {
- .format = MALI_R32UI,
- .swizzle = panfrost_get_default_swizzle(1)
- };
-
- /* See mali_attr_meta specification for the magic number */
-
- builtin.index = so->vertexid_index;
- memcpy(&target[PAN_VERTEX_ID], &builtin, 4);
-
- builtin.index = so->vertexid_index + 1;
- memcpy(&target[PAN_INSTANCE_ID], &builtin, 4);
-
- ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu;
}
/* Compute number of UBOs active (more specifically, compute the highest UBO
panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT,
&ctx->payloads[PIPE_SHADER_FRAGMENT]);
- /* We stage to transient, so always dirty.. */
- if (ctx->vertex)
- panfrost_stage_attributes(ctx);
+ panfrost_emit_vertex_attr_meta(batch,
+ &ctx->payloads[PIPE_SHADER_VERTEX]);
for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) {
panfrost_emit_sampler_descriptors(batch, i, &ctx->payloads[i]);
/* Handle dirty flags now */
panfrost_emit_for_draw(ctx);
- /* If rasterizer discard is enable, only submit the vertex */
-
- bool rasterizer_discard = ctx->rasterizer
- && ctx->rasterizer->base.rasterizer_discard;
-
-
- struct midgard_payload_vertex_tiler *vertex_payload = &ctx->payloads[PIPE_SHADER_VERTEX];
- struct midgard_payload_vertex_tiler *tiler_payload = &ctx->payloads[PIPE_SHADER_FRAGMENT];
-
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
- bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
-
- if (wallpapering) {
- /* Inject in reverse order, with "predicted" job indices. THIS IS A HACK XXX */
- panfrost_new_job(batch, JOB_TYPE_TILER, false, batch->job_index + 2, tiler_payload, sizeof(*tiler_payload), true);
- panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), true);
- } else {
- unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), false);
-
- if (!rasterizer_discard)
- panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tiler_payload, sizeof(*tiler_payload), false);
- }
+ panfrost_emit_vertex_tiler_jobs(batch,
+ &ctx->payloads[PIPE_SHADER_VERTEX],
+ &ctx->payloads[PIPE_SHADER_FRAGMENT]);
panfrost_batch_adjust_stack_size(batch);
}
#undef DEFINE_CASE
-static unsigned
-panfrost_translate_index_size(unsigned size)
-{
- switch (size) {
- case 1:
- return MALI_DRAW_INDEXED_UINT8;
-
- case 2:
- return MALI_DRAW_INDEXED_UINT16;
-
- case 4:
- return MALI_DRAW_INDEXED_UINT32;
-
- default:
- unreachable("Invalid index size");
- }
-}
-
-/* Gets a GPU address for the associated index buffer. Only gauranteed to be
- * good for the duration of the draw (transient), could last longer. Also get
- * the bounds on the index buffer for the range accessed by the draw. We do
- * these operations together because there are natural optimizations which
- * require them to be together. */
-
-static mali_ptr
-panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pipe_draw_info *info, unsigned *min_index, unsigned *max_index)
-{
- struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
-
- off_t offset = info->start * info->index_size;
- struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
- mali_ptr out = 0;
-
- bool needs_indices = true;
-
- if (info->max_index != ~0u) {
- *min_index = info->min_index;
- *max_index = info->max_index;
- needs_indices = false;
- }
-
- if (!info->has_user_indices) {
- /* Only resources can be directly mapped */
- panfrost_batch_add_bo(batch, rsrc->bo,
- PAN_BO_ACCESS_SHARED |
- PAN_BO_ACCESS_READ |
- PAN_BO_ACCESS_VERTEX_TILER);
- out = rsrc->bo->gpu + offset;
-
- /* Check the cache */
- needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, info->start, info->count,
- min_index, max_index);
- } else {
- /* Otherwise, we need to upload to transient memory */
- const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
- out = panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
- }
-
- if (needs_indices) {
- /* Fallback */
- u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
-
- if (!info->has_user_indices) {
- panfrost_minmax_cache_add(rsrc->index_cache, info->start, info->count,
- *min_index, *max_index);
- }
- }
-
-
- return out;
-}
-
static bool
panfrost_scissor_culls_everything(struct panfrost_context *ctx)
{
ctx->tf_prims_generated += prims;
}
+static void
+panfrost_update_streamout_offsets(struct panfrost_context *ctx)
+{
+ for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
+ unsigned count;
+
+ count = u_stream_outputs_for_vertices(ctx->active_prim,
+ ctx->vertex_count);
+ ctx->streamout.offsets[i] += count;
+ }
+}
+
static void
panfrost_draw_vbo(
struct pipe_context *pipe,
}
}
- ctx->payloads[PIPE_SHADER_VERTEX].offset_start = info->start;
- ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = info->start;
-
/* Now that we have a guaranteed terminating path, find the job.
* Assignment commented out to prevent unused warning */
/* struct panfrost_batch *batch = */ panfrost_get_batch_for_fbo(ctx);
- ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode = g2m_draw_mode(mode);
-
/* Take into account a negative bias */
ctx->vertex_count = info->count + abs(info->index_bias);
ctx->instance_count = info->instance_count;
ctx->active_prim = info->mode;
- /* For non-indexed draws, they're the same */
- unsigned vertex_count = ctx->vertex_count;
-
- unsigned draw_flags = 0;
-
- /* The draw flags interpret how primitive size is interpreted */
-
- if (panfrost_writes_point_size(ctx))
- draw_flags |= MALI_DRAW_VARYING_SIZE;
+ unsigned vertex_count;
- if (info->primitive_restart)
- draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
-
- /* These doesn't make much sense */
-
- draw_flags |= 0x3000;
-
- if (ctx->rasterizer && ctx->rasterizer->base.flatshade_first)
- draw_flags |= MALI_DRAW_FLATSHADE_FIRST;
+ panfrost_vt_set_draw_info(ctx, info, g2m_draw_mode(mode),
+ &ctx->payloads[PIPE_SHADER_VERTEX],
+ &ctx->payloads[PIPE_SHADER_FRAGMENT],
+ &vertex_count, &ctx->padded_count);
panfrost_statistics_record(ctx, info);
- if (info->index_size) {
- unsigned min_index = 0, max_index = 0;
- ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices =
- panfrost_get_index_buffer_bounded(ctx, info, &min_index, &max_index);
-
- /* Use the corresponding values */
- vertex_count = max_index - min_index + 1;
- ctx->payloads[PIPE_SHADER_VERTEX].offset_start = min_index + info->index_bias;
- ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = min_index + info->index_bias;
-
- ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index;
- ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count);
-
- draw_flags |= panfrost_translate_index_size(info->index_size);
- } else {
- /* Index count == vertex count, if no indexing is applied, as
- * if it is internally indexed in the expected order */
-
- ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = 0;
- ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
-
- /* Reverse index state */
- ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (mali_ptr) 0;
- }
-
/* Dispatch "compute jobs" for the vertex/tiler pair as (1,
* vertex_count, 1) */
1, vertex_count, info->instance_count,
1, 1, 1);
- ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.unknown_draw = draw_flags;
-
- /* Encode the padded vertex count */
-
- if (info->instance_count > 1) {
- ctx->padded_count = panfrost_padded_vertex_count(vertex_count);
-
- unsigned shift = __builtin_ctz(ctx->padded_count);
- unsigned k = ctx->padded_count >> (shift + 1);
-
- ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = shift;
- ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = shift;
-
- ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = k;
- ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = k;
- } else {
- ctx->padded_count = vertex_count;
-
- /* Reset instancing state */
- ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = 0;
- ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = 0;
- ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = 0;
- ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = 0;
- }
-
/* Fire off the draw itself */
panfrost_queue_draw(ctx);
/* Increment transform feedback offsets */
-
- for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
- unsigned output_count = u_stream_outputs_for_vertices(
- ctx->active_prim, ctx->vertex_count);
-
- ctx->streamout.offsets[i] += output_count;
- }
+ panfrost_update_streamout_offsets(ctx);
}
/* CSO state */
so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels);
so->hw[i].format = panfrost_find_format(desc);
-
- /* The field itself should probably be shifted over */
- so->hw[i].src_offset = elements[i].src_offset;
}
+ /* Let's also prepare vertex builtins */
+ so->hw[PAN_VERTEX_ID].format = MALI_R32UI;
+ so->hw[PAN_VERTEX_ID].swizzle = panfrost_get_default_swizzle(1);
+ so->hw[PAN_INSTANCE_ID].format = MALI_R32UI;
+ so->hw[PAN_INSTANCE_ID].swizzle = panfrost_get_default_swizzle(1);
+
return so;
}