*/
#include "pan_context.h"
+#include "util/u_prim.h"
static mali_ptr
panfrost_emit_varyings(
return transfer.gpu;
}
+static void
+panfrost_emit_streamout(
+ struct panfrost_context *ctx,
+ union mali_attr *slot,
+ unsigned stride,
+ unsigned offset,
+ unsigned count,
+ struct pipe_stream_output_target *target)
+{
+ /* Fill out the descriptor */
+ slot->stride = stride * 4;
+ slot->shift = slot->extra_flags = 0;
+
+ unsigned max_size = target->buffer_size;
+ unsigned expected_size = slot->stride * count;
+
+ slot->size = MIN2(max_size, expected_size);
+
+ /* Grab the BO and bind it to the batch */
+ struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx);
+ struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
+ panfrost_job_add_bo(batch, bo);
+
+ mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
+ slot->elements = addr;
+}
+
static void
panfrost_emit_point_coord(union mali_attr *slot)
{
}
}
+static bool
+has_point_coord(unsigned mask, gl_varying_slot loc)
+{
+ if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
+ return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
+ else if (loc == VARYING_SLOT_PNTC)
+ return (mask & (1 << 8));
+ else
+ return false;
+}
+
+/* Helpers for manipulating stream out information so we can pack varyings
+ * accordingly. Compute the src_offset for a given captured varying */
+
+static struct pipe_stream_output
+pan_get_so(struct pipe_stream_output_info info, gl_varying_slot loc)
+{
+ for (unsigned i = 0; i < info.num_outputs; ++i) {
+ if (info.output[i].register_index == loc)
+ return info.output[i];
+ }
+
+ unreachable("Varying not captured");
+}
+
+/* TODO: Integers */
+static enum mali_format
+pan_xfb_format(unsigned nr_components)
+{
+ switch (nr_components) {
+ case 1: return MALI_R32F;
+ case 2: return MALI_RG32F;
+ case 3: return MALI_RGB32F;
+ case 4: return MALI_RGBA32F;
+ default: unreachable("Invalid format");
+ }
+}
+
void
panfrost_emit_varying_descriptor(
struct panfrost_context *ctx,
struct panfrost_transfer trans = panfrost_allocate_transient(ctx,
vs_size + fs_size);
- for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
- if (!is_special_varying(vs->varyings_loc[i]))
- vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
- }
-
- for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
- unsigned j;
+ struct pipe_stream_output_info so = vs->stream_output;
- /* If we have a point sprite replacement, handle that here. We
- * have to translate location first. TODO: Flip y in shader.
- * We're already keying ... just time crunch .. */
+ /* Check if this varying is linked by us. This is the case for
+ * general-purpose, non-captured varyings. If it is, link it. If it's
+ * not, use the provided stream out information to determine the
+ * offset, since it was already linked for us. */
- unsigned loc = fs->varyings_loc[i];
- unsigned pnt_loc =
- (loc >= VARYING_SLOT_TEX0) ? (loc - VARYING_SLOT_TEX0) :
- (loc == VARYING_SLOT_PNTC) ? 8 :
- ~0;
+ for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
+ gl_varying_slot loc = vs->varyings_loc[i];
- if (~pnt_loc && fs->point_sprite_mask & (1 << pnt_loc)) {
- /* gl_PointCoord index by convention */
- fs->varyings[i].index = 3;
- fs->reads_point_coord = true;
+ bool special = is_special_varying(loc);
+ bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
- /* Swizzle out the z/w to 0/1 */
- fs->varyings[i].format = MALI_RG16F;
- fs->varyings[i].swizzle =
- panfrost_get_default_swizzle(2);
+ if (captured) {
+ struct pipe_stream_output o = pan_get_so(so, loc);
- continue;
+ unsigned dst_offset = o.dst_offset * 4; /* dwords */
+ vs->varyings[i].src_offset = dst_offset;
+ } else if (!special) {
+ vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
}
+ }
- if (fs->varyings[i].index)
- continue;
+ /* Conversely, we need to set src_offset for the captured varyings.
+ * Here, the layout is defined by the stream out info, not us */
+
+ /* Link up with fragment varyings */
+ bool reads_point_coord = fs->reads_point_coord;
+
+ for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
+ gl_varying_slot loc = fs->varyings_loc[i];
+ signed vs_idx = -1;
- /*
- * Re-use the VS general purpose varying pos if it exists,
- * create a new one otherwise.
- */
- for (j = 0; j < vs->tripipe->varying_count; j++) {
- if (fs->varyings_loc[i] == vs->varyings_loc[j])
+ /* Link up */
+ for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) {
+ if (vs->varyings_loc[j] == loc) {
+ vs_idx = j;
break;
+ }
}
- if (j < vs->tripipe->varying_count)
- fs->varyings[i].src_offset = vs->varyings[j].src_offset;
+ /* Either assign or reuse */
+ if (vs_idx >= 0)
+ fs->varyings[i].src_offset = vs->varyings[vs_idx].src_offset;
else
fs->varyings[i].src_offset = 16 * (num_gen_varyings++);
+
+ if (has_point_coord(fs->point_sprite_mask, loc))
+ reads_point_coord |= true;
}
memcpy(trans.cpu, vs->varyings, vs_size);
union mali_attr varyings[PIPE_MAX_ATTRIBS];
- unsigned idx = 0;
+ /* Figure out how many streamout buffers could be bound */
+ unsigned so_count = ctx->streamout.num_targets;
+ for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
+ gl_varying_slot loc = vs->varyings_loc[i];
+
+ bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
+ if (!captured) continue;
+
+ struct pipe_stream_output o = pan_get_so(so, loc);
+ so_count = MAX2(so_count, o.output_buffer + 1);
+ }
+
+ signed idx = so_count;
signed general = idx++;
signed gl_Position = idx++;
signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
- signed gl_PointCoord = fs->reads_point_coord ? (idx++) : -1;
+ signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
+ /* Emit the stream out buffers */
+
+ unsigned output_count = u_stream_outputs_for_vertices(
+ ctx->active_prim, ctx->vertex_count);
+
+ for (unsigned i = 0; i < so_count; ++i) {
+ struct pipe_stream_output_target *target =
+ (i < ctx->streamout.num_targets) ? ctx->streamout.targets[i] : NULL;
+
+ if (target) {
+ panfrost_emit_streamout(ctx, &varyings[i], so.stride[i], ctx->streamout.offsets[i], output_count, target);
+ } else {
+ /* Emit a dummy buffer */
+ panfrost_emit_varyings(ctx, &varyings[i], so.stride[i] * 4, output_count);
+
+ /* Clear the attribute type */
+ varyings[i].elements &= ~0xF;
+ }
+ }
+
panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16,
vertex_count);
panfrost_emit_varyings(ctx, &varyings[gl_PointSize],
2, vertex_count);
- if (fs->reads_point_coord)
+ if (reads_point_coord)
panfrost_emit_point_coord(&varyings[gl_PointCoord]);
if (fs->reads_face)
general, gl_Position, gl_PointSize,
gl_PointCoord, gl_FrontFacing);
+ /* Replace streamout */
+
+ struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu);
+ struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size);
+
+ for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
+ gl_varying_slot loc = vs->varyings_loc[i];
+
+ bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
+ if (!captured) continue;
+
+ struct pipe_stream_output o = pan_get_so(so, loc);
+ ovs[i].index = o.output_buffer;
+
+ /* Set the type appropriately. TODO: Integer varyings XXX */
+ assert(o.stream == 0);
+ ovs[i].format = pan_xfb_format(o.num_components);
+ ovs[i].swizzle = panfrost_get_default_swizzle(o.num_components);
+
+ /* Link to the fragment */
+ signed fs_idx = -1;
+
+ /* Link up */
+ for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) {
+ if (fs->varyings_loc[j] == loc) {
+ fs_idx = j;
+ break;
+ }
+ }
+
+ if (fs_idx >= 0) {
+ ofs[fs_idx].index = ovs[i].index;
+ ofs[fs_idx].format = ovs[i].format;
+ ofs[fs_idx].swizzle = ovs[i].swizzle;
+ }
+ }
+
+ /* Replace point sprite */
+ for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
+ /* If we have a point sprite replacement, handle that here. We
+ * have to translate location first. TODO: Flip y in shader.
+ * We're already keying ... just time crunch .. */
+
+ if (has_point_coord(fs->point_sprite_mask, fs->varyings_loc[i])) {
+ ofs[i].index = gl_PointCoord;
+
+ /* Swizzle out the z/w to 0/1 */
+ ofs[i].format = MALI_RG16F;
+ ofs[i].swizzle =
+ panfrost_get_default_swizzle(2);
+ }
+ }
+
+ /* Fix up unaligned addresses */
+ for (unsigned i = 0; i < so_count; ++i) {
+ unsigned align = (varyings[i].elements & 63);
+
+ /* While we're at it, the SO buffers are linear */
+
+ if (!align) {
+ varyings[i].elements |= MALI_ATTR_LINEAR;
+ continue;
+ }
+
+ /* We need to adjust alignment */
+ varyings[i].elements &= ~63;
+ varyings[i].elements |= MALI_ATTR_LINEAR;
+ varyings[i].size += align;
+
+ for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) {
+ if (ovs[v].index == i)
+ ovs[v].src_offset = vs->varyings[v].src_offset + align;
+ }
+
+ for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) {
+ if (ofs[f].index == i)
+ ofs[f].src_offset = fs->varyings[f].src_offset + align;
+ }
+ }
+
mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr));
ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p;
ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p;