From 836686daf36cd8d7c17c909d98f5c9e07549d674 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 6 Mar 2020 09:45:31 +0100 Subject: [PATCH] panfrost: Move panfrost_emit_varying_descriptor() to pan_cmdstream.c Move panfrost_emit_varying_descriptor() to pan_cmdstream.c where other emit functions live and adjust the prototype to be consistent with other emit helpers. Signed-off-by: Boris Brezillon Reviewed-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/Makefile.sources | 3 +- src/gallium/drivers/panfrost/meson.build | 1 - src/gallium/drivers/panfrost/pan_cmdstream.c | 400 +++++++++++++++++ src/gallium/drivers/panfrost/pan_cmdstream.h | 6 + src/gallium/drivers/panfrost/pan_context.c | 6 +- src/gallium/drivers/panfrost/pan_context.h | 6 - src/gallium/drivers/panfrost/pan_varyings.c | 418 ------------------ 7 files changed, 411 insertions(+), 429 deletions(-) delete mode 100644 src/gallium/drivers/panfrost/pan_varyings.c diff --git a/src/gallium/drivers/panfrost/Makefile.sources b/src/gallium/drivers/panfrost/Makefile.sources index 78bd155a3d6..eea6605d040 100644 --- a/src/gallium/drivers/panfrost/Makefile.sources +++ b/src/gallium/drivers/panfrost/Makefile.sources @@ -30,5 +30,4 @@ C_SOURCES := \ pan_screen.c \ pan_screen.h \ pan_sfbd.c \ - pan_util.h \ - pan_varyings.c + pan_util.h diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index eb5d62715b4..2d0789db802 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -44,7 +44,6 @@ files_panfrost = files( 'pan_scoreboard.c', 'pan_sfbd.c', 'pan_mfbd.c', - 'pan_varyings.c', ) panfrost_includes = [ diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index d49bdd95ec2..02d3cfde068 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -23,6 +23,7 @@ */ #include "util/macros.h" +#include "util/u_prim.h" #include "util/u_vbuf.h" #include "panfrost-quirks.h" @@ -1314,6 +1315,405 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, k * sizeof(*attrs)); } +static mali_ptr +panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot, + unsigned stride, unsigned count) +{ + /* Fill out the descriptor */ + slot->stride = stride; + slot->size = stride * count; + slot->shift = slot->extra_flags = 0; + + struct panfrost_transfer transfer = panfrost_allocate_transient(batch, + slot->size); + + slot->elements = transfer.gpu | MALI_ATTR_LINEAR; + + return transfer.gpu; +} + +static void +panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot, + unsigned stride, unsigned offset, unsigned count, + struct pipe_stream_output_target *target) +{ + /* Fill out the descriptor */ + slot->stride = stride * 4; + slot->shift = slot->extra_flags = 0; + + unsigned max_size = target->buffer_size; + unsigned expected_size = slot->stride * count; + + slot->size = MIN2(max_size, expected_size); + + /* Grab the BO and bind it to the batch */ + struct panfrost_bo *bo = pan_resource(target->buffer)->bo; + + /* Varyings are WRITE from the perspective of the VERTEX but READ from + * the perspective of the TILER and FRAGMENT. + */ + panfrost_batch_add_bo(batch, bo, + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_RW | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); + + mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride); + slot->elements = addr; +} + +/* Given a shader and buffer indices, link varying metadata together */ + +static bool +is_special_varying(gl_varying_slot loc) +{ + switch (loc) { + case VARYING_SLOT_POS: + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_PNTC: + case VARYING_SLOT_FACE: + return true; + default: + return false; + } +} + +static void +panfrost_emit_varying_meta(void *outptr, struct panfrost_shader_state *ss, + signed general, signed gl_Position, + signed gl_PointSize, signed gl_PointCoord, + signed gl_FrontFacing) +{ + struct mali_attr_meta *out = (struct mali_attr_meta *) outptr; + + for (unsigned i = 0; i < ss->varying_count; ++i) { + gl_varying_slot location = ss->varyings_loc[i]; + int index = -1; + + switch (location) { + case VARYING_SLOT_POS: + index = gl_Position; + break; + case VARYING_SLOT_PSIZ: + index = gl_PointSize; + break; + case VARYING_SLOT_PNTC: + index = gl_PointCoord; + break; + case VARYING_SLOT_FACE: + index = gl_FrontFacing; + break; + default: + index = general; + break; + } + + assert(index >= 0); + out[i].index = index; + } +} + +static bool +has_point_coord(unsigned mask, gl_varying_slot loc) +{ + if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7)) + return (mask & (1 << (loc - VARYING_SLOT_TEX0))); + else if (loc == VARYING_SLOT_PNTC) + return (mask & (1 << 8)); + else + return false; +} + +/* Helpers for manipulating stream out information so we can pack varyings + * accordingly. Compute the src_offset for a given captured varying */ + +static struct pipe_stream_output * +pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc) +{ + for (unsigned i = 0; i < info->num_outputs; ++i) { + if (info->output[i].register_index == loc) + return &info->output[i]; + } + + unreachable("Varying not captured"); +} + +/* TODO: Integers */ +static enum mali_format +pan_xfb_format(unsigned nr_components) +{ + switch (nr_components) { + case 1: return MALI_R32F; + case 2: return MALI_RG32F; + case 3: return MALI_RGB32F; + case 4: return MALI_RGBA32F; + default: unreachable("Invalid format"); + } +} + +void +panfrost_emit_varying_descriptor(struct panfrost_batch *batch, + unsigned vertex_count, + struct midgard_payload_vertex_tiler *vp, + struct midgard_payload_vertex_tiler *tp) +{ + /* Load the shaders */ + struct panfrost_context *ctx = batch->ctx; + struct panfrost_shader_state *vs, *fs; + unsigned int num_gen_varyings = 0; + size_t vs_size, fs_size; + + /* Allocate the varying descriptor */ + + vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX); + fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); + vs_size = sizeof(struct mali_attr_meta) * vs->varying_count; + fs_size = sizeof(struct mali_attr_meta) * fs->varying_count; + + struct panfrost_transfer trans = panfrost_allocate_transient(batch, + vs_size + + fs_size); + + struct pipe_stream_output_info *so = &vs->stream_output; + + /* Check if this varying is linked by us. This is the case for + * general-purpose, non-captured varyings. If it is, link it. If it's + * not, use the provided stream out information to determine the + * offset, since it was already linked for us. */ + + for (unsigned i = 0; i < vs->varying_count; i++) { + gl_varying_slot loc = vs->varyings_loc[i]; + + bool special = is_special_varying(loc); + bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); + + if (captured) { + struct pipe_stream_output *o = pan_get_so(so, loc); + + unsigned dst_offset = o->dst_offset * 4; /* dwords */ + vs->varyings[i].src_offset = dst_offset; + } else if (!special) { + vs->varyings[i].src_offset = 16 * (num_gen_varyings++); + } + } + + /* Conversely, we need to set src_offset for the captured varyings. + * Here, the layout is defined by the stream out info, not us */ + + /* Link up with fragment varyings */ + bool reads_point_coord = fs->reads_point_coord; + + for (unsigned i = 0; i < fs->varying_count; i++) { + gl_varying_slot loc = fs->varyings_loc[i]; + unsigned src_offset; + signed vs_idx = -1; + + /* Link up */ + for (unsigned j = 0; j < vs->varying_count; ++j) { + if (vs->varyings_loc[j] == loc) { + vs_idx = j; + break; + } + } + + /* Either assign or reuse */ + if (vs_idx >= 0) + src_offset = vs->varyings[vs_idx].src_offset; + else + src_offset = 16 * (num_gen_varyings++); + + fs->varyings[i].src_offset = src_offset; + + if (has_point_coord(fs->point_sprite_mask, loc)) + reads_point_coord = true; + } + + memcpy(trans.cpu, vs->varyings, vs_size); + memcpy(trans.cpu + vs_size, fs->varyings, fs_size); + + union mali_attr varyings[PIPE_MAX_ATTRIBS] = { }; + + /* Figure out how many streamout buffers could be bound */ + unsigned so_count = ctx->streamout.num_targets; + for (unsigned i = 0; i < vs->varying_count; i++) { + gl_varying_slot loc = vs->varyings_loc[i]; + + bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); + if (!captured) continue; + + struct pipe_stream_output *o = pan_get_so(so, loc); + so_count = MAX2(so_count, o->output_buffer + 1); + } + + signed idx = so_count; + signed general = idx++; + signed gl_Position = idx++; + signed gl_PointSize = vs->writes_point_size ? (idx++) : -1; + signed gl_PointCoord = reads_point_coord ? (idx++) : -1; + signed gl_FrontFacing = fs->reads_face ? (idx++) : -1; + signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1; + + /* Emit the stream out buffers */ + + unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim, + ctx->vertex_count); + + for (unsigned i = 0; i < so_count; ++i) { + if (i < ctx->streamout.num_targets) { + panfrost_emit_streamout(batch, &varyings[i], + so->stride[i], + ctx->streamout.offsets[i], + out_count, + ctx->streamout.targets[i]); + } else { + /* Emit a dummy buffer */ + panfrost_emit_varyings(batch, &varyings[i], + so->stride[i] * 4, + out_count); + + /* Clear the attribute type */ + varyings[i].elements &= ~0xF; + } + } + + panfrost_emit_varyings(batch, &varyings[general], + num_gen_varyings * 16, + vertex_count); + + mali_ptr varyings_p; + + /* fp32 vec4 gl_Position */ + varyings_p = panfrost_emit_varyings(batch, &varyings[gl_Position], + sizeof(float) * 4, vertex_count); + tp->postfix.position_varying = varyings_p; + + + if (panfrost_writes_point_size(ctx)) { + varyings_p = panfrost_emit_varyings(batch, + &varyings[gl_PointSize], + 2, vertex_count); + tp->primitive_size.pointer = varyings_p; + } + + if (reads_point_coord) + varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD; + + if (fs->reads_face) + varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING; + + if (fs->reads_frag_coord) + varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD; + + /* Let's go ahead and link varying meta to the buffer in question, now + * that that information is available. VARYING_SLOT_POS is mapped to + * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders + * */ + + panfrost_emit_varying_meta(trans.cpu, vs, general, gl_Position, + gl_PointSize, gl_PointCoord, + gl_FrontFacing); + + panfrost_emit_varying_meta(trans.cpu + vs_size, fs, general, + gl_FragCoord, gl_PointSize, + gl_PointCoord, gl_FrontFacing); + + /* Replace streamout */ + + struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu; + struct mali_attr_meta *ofs = ovs + vs->varying_count; + + for (unsigned i = 0; i < vs->varying_count; i++) { + gl_varying_slot loc = vs->varyings_loc[i]; + + bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); + if (!captured) + continue; + + struct pipe_stream_output *o = pan_get_so(so, loc); + ovs[i].index = o->output_buffer; + + /* Set the type appropriately. TODO: Integer varyings XXX */ + assert(o->stream == 0); + ovs[i].format = pan_xfb_format(o->num_components); + ovs[i].swizzle = panfrost_get_default_swizzle(o->num_components); + + /* Link to the fragment */ + signed fs_idx = -1; + + /* Link up */ + for (unsigned j = 0; j < fs->varying_count; ++j) { + if (fs->varyings_loc[j] == loc) { + fs_idx = j; + break; + } + } + + if (fs_idx >= 0) { + ofs[fs_idx].index = ovs[i].index; + ofs[fs_idx].format = ovs[i].format; + ofs[fs_idx].swizzle = ovs[i].swizzle; + } + } + + /* Replace point sprite */ + for (unsigned i = 0; i < fs->varying_count; i++) { + /* If we have a point sprite replacement, handle that here. We + * have to translate location first. TODO: Flip y in shader. + * We're already keying ... just time crunch .. */ + + if (has_point_coord(fs->point_sprite_mask, + fs->varyings_loc[i])) { + ofs[i].index = gl_PointCoord; + + /* Swizzle out the z/w to 0/1 */ + ofs[i].format = MALI_RG16F; + ofs[i].swizzle = panfrost_get_default_swizzle(2); + } + } + + /* Fix up unaligned addresses */ + for (unsigned i = 0; i < so_count; ++i) { + if (varyings[i].elements < MALI_RECORD_SPECIAL) + continue; + + unsigned align = (varyings[i].elements & 63); + + /* While we're at it, the SO buffers are linear */ + + if (!align) { + varyings[i].elements |= MALI_ATTR_LINEAR; + continue; + } + + /* We need to adjust alignment */ + varyings[i].elements &= ~63; + varyings[i].elements |= MALI_ATTR_LINEAR; + varyings[i].size += align; + + for (unsigned v = 0; v < vs->varying_count; ++v) { + if (ovs[v].index != i) + continue; + + ovs[v].src_offset = vs->varyings[v].src_offset + align; + } + + for (unsigned f = 0; f < fs->varying_count; ++f) { + if (ofs[f].index != i) + continue; + + ofs[f].src_offset = fs->varyings[f].src_offset + align; + } + } + + varyings_p = panfrost_upload_transient(batch, varyings, + idx * sizeof(*varyings)); + vp->postfix.varyings = varyings_p; + tp->postfix.varyings = varyings_p; + + vp->postfix.varying_meta = trans.gpu; + tp->postfix.varying_meta = trans.gpu + vs_size; +} + void panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch, struct midgard_payload_vertex_tiler *vp, diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.h b/src/gallium/drivers/panfrost/pan_cmdstream.h index 4574750af06..655110243cd 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.h +++ b/src/gallium/drivers/panfrost/pan_cmdstream.h @@ -93,6 +93,12 @@ void panfrost_emit_vertex_data(struct panfrost_batch *batch, struct midgard_payload_vertex_tiler *vp); +void +panfrost_emit_varying_descriptor(struct panfrost_batch *batch, + unsigned vertex_count, + struct midgard_payload_vertex_tiler *vp, + struct midgard_payload_vertex_tiler *tp); + void panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch, struct midgard_payload_vertex_tiler *vp, diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index d938e43d2f4..db9f96a0185 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -446,9 +446,11 @@ panfrost_draw_vbo( /* Emit all sort of descriptors. */ panfrost_emit_vertex_data(batch, &ctx->payloads[PIPE_SHADER_VERTEX]); - panfrost_emit_varying_descriptor(ctx, + panfrost_emit_varying_descriptor(batch, ctx->padded_count * - ctx->instance_count); + ctx->instance_count, + &ctx->payloads[PIPE_SHADER_VERTEX], + &ctx->payloads[PIPE_SHADER_FRAGMENT]); panfrost_emit_shader_meta(batch, PIPE_SHADER_VERTEX, &ctx->payloads[PIPE_SHADER_VERTEX]); panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT, diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 3139309a94c..624a2d76554 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -337,11 +337,5 @@ panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i); void panfrost_compute_context_init(struct pipe_context *pctx); -/* Varyings */ - -void -panfrost_emit_varying_descriptor( - struct panfrost_context *ctx, - unsigned vertex_count); #endif diff --git a/src/gallium/drivers/panfrost/pan_varyings.c b/src/gallium/drivers/panfrost/pan_varyings.c deleted file mode 100644 index 365237eaf8c..00000000000 --- a/src/gallium/drivers/panfrost/pan_varyings.c +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright (C) 2018-2019 Alyssa Rosenzweig - * Copyright (C) 2019 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include "pan_bo.h" -#include "pan_context.h" -#include "util/u_prim.h" - -static mali_ptr -panfrost_emit_varyings( - struct panfrost_context *ctx, - union mali_attr *slot, - unsigned stride, - unsigned count) -{ - /* Fill out the descriptor */ - slot->stride = stride; - slot->size = stride * count; - slot->shift = slot->extra_flags = 0; - - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - struct panfrost_transfer transfer = - panfrost_allocate_transient(batch, slot->size); - - slot->elements = transfer.gpu | MALI_ATTR_LINEAR; - - return transfer.gpu; -} - -static void -panfrost_emit_streamout( - struct panfrost_context *ctx, - union mali_attr *slot, - unsigned stride, - unsigned offset, - unsigned count, - struct pipe_stream_output_target *target) -{ - /* Fill out the descriptor */ - slot->stride = stride * 4; - slot->shift = slot->extra_flags = 0; - - unsigned max_size = target->buffer_size; - unsigned expected_size = slot->stride * count; - - slot->size = MIN2(max_size, expected_size); - - /* Grab the BO and bind it to the batch */ - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - struct panfrost_bo *bo = pan_resource(target->buffer)->bo; - - /* Varyings are WRITE from the perspective of the VERTEX but READ from - * the perspective of the TILER and FRAGMENT. - */ - panfrost_batch_add_bo(batch, bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_RW | - PAN_BO_ACCESS_VERTEX_TILER | - PAN_BO_ACCESS_FRAGMENT); - - mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride); - slot->elements = addr; -} - -/* Given a shader and buffer indices, link varying metadata together */ - -static bool -is_special_varying(gl_varying_slot loc) -{ - switch (loc) { - case VARYING_SLOT_POS: - case VARYING_SLOT_PSIZ: - case VARYING_SLOT_PNTC: - case VARYING_SLOT_FACE: - return true; - default: - return false; - } -} - -static void -panfrost_emit_varying_meta( - void *outptr, struct panfrost_shader_state *ss, - signed general, signed gl_Position, - signed gl_PointSize, signed gl_PointCoord, - signed gl_FrontFacing) -{ - struct mali_attr_meta *out = (struct mali_attr_meta *) outptr; - - for (unsigned i = 0; i < ss->varying_count; ++i) { - gl_varying_slot location = ss->varyings_loc[i]; - int index = -1; - - switch (location) { - case VARYING_SLOT_POS: - index = gl_Position; - break; - case VARYING_SLOT_PSIZ: - index = gl_PointSize; - break; - case VARYING_SLOT_PNTC: - index = gl_PointCoord; - break; - case VARYING_SLOT_FACE: - index = gl_FrontFacing; - break; - default: - index = general; - break; - } - - assert(index >= 0); - out[i].index = index; - } -} - -static bool -has_point_coord(unsigned mask, gl_varying_slot loc) -{ - if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7)) - return (mask & (1 << (loc - VARYING_SLOT_TEX0))); - else if (loc == VARYING_SLOT_PNTC) - return (mask & (1 << 8)); - else - return false; -} - -/* Helpers for manipulating stream out information so we can pack varyings - * accordingly. Compute the src_offset for a given captured varying */ - -static struct pipe_stream_output -pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc) -{ - for (unsigned i = 0; i < info->num_outputs; ++i) { - if (info->output[i].register_index == loc) - return info->output[i]; - } - - unreachable("Varying not captured"); -} - -/* TODO: Integers */ -static enum mali_format -pan_xfb_format(unsigned nr_components) -{ - switch (nr_components) { - case 1: return MALI_R32F; - case 2: return MALI_RG32F; - case 3: return MALI_RGB32F; - case 4: return MALI_RGBA32F; - default: unreachable("Invalid format"); - } -} - -void -panfrost_emit_varying_descriptor( - struct panfrost_context *ctx, - unsigned vertex_count) -{ - /* Load the shaders */ - - struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX); - struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); - unsigned int num_gen_varyings = 0; - - /* Allocate the varying descriptor */ - - size_t vs_size = sizeof(struct mali_attr_meta) * vs->varying_count; - size_t fs_size = sizeof(struct mali_attr_meta) * fs->varying_count; - - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - struct panfrost_transfer trans = panfrost_allocate_transient(batch, - vs_size + fs_size); - - struct pipe_stream_output_info so = vs->stream_output; - - /* Check if this varying is linked by us. This is the case for - * general-purpose, non-captured varyings. If it is, link it. If it's - * not, use the provided stream out information to determine the - * offset, since it was already linked for us. */ - - for (unsigned i = 0; i < vs->varying_count; i++) { - gl_varying_slot loc = vs->varyings_loc[i]; - - bool special = is_special_varying(loc); - bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); - - if (captured) { - struct pipe_stream_output o = pan_get_so(&so, loc); - - unsigned dst_offset = o.dst_offset * 4; /* dwords */ - vs->varyings[i].src_offset = dst_offset; - } else if (!special) { - vs->varyings[i].src_offset = 16 * (num_gen_varyings++); - } - } - - /* Conversely, we need to set src_offset for the captured varyings. - * Here, the layout is defined by the stream out info, not us */ - - /* Link up with fragment varyings */ - bool reads_point_coord = fs->reads_point_coord; - - for (unsigned i = 0; i < fs->varying_count; i++) { - gl_varying_slot loc = fs->varyings_loc[i]; - signed vs_idx = -1; - - /* Link up */ - for (unsigned j = 0; j < vs->varying_count; ++j) { - if (vs->varyings_loc[j] == loc) { - vs_idx = j; - break; - } - } - - /* Either assign or reuse */ - if (vs_idx >= 0) - fs->varyings[i].src_offset = vs->varyings[vs_idx].src_offset; - else - fs->varyings[i].src_offset = 16 * (num_gen_varyings++); - - if (has_point_coord(fs->point_sprite_mask, loc)) - reads_point_coord = true; - } - - memcpy(trans.cpu, vs->varyings, vs_size); - memcpy(trans.cpu + vs_size, fs->varyings, fs_size); - - union mali_attr varyings[PIPE_MAX_ATTRIBS]; - memset(varyings, 0, sizeof(varyings)); - - /* Figure out how many streamout buffers could be bound */ - unsigned so_count = ctx->streamout.num_targets; - for (unsigned i = 0; i < vs->varying_count; i++) { - gl_varying_slot loc = vs->varyings_loc[i]; - - bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); - if (!captured) continue; - - struct pipe_stream_output o = pan_get_so(&so, loc); - so_count = MAX2(so_count, o.output_buffer + 1); - } - - signed idx = so_count; - signed general = idx++; - signed gl_Position = idx++; - signed gl_PointSize = vs->writes_point_size ? (idx++) : -1; - signed gl_PointCoord = reads_point_coord ? (idx++) : -1; - signed gl_FrontFacing = fs->reads_face ? (idx++) : -1; - signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1; - - /* Emit the stream out buffers */ - - unsigned output_count = u_stream_outputs_for_vertices( - ctx->active_prim, ctx->vertex_count); - - for (unsigned i = 0; i < so_count; ++i) { - struct pipe_stream_output_target *target = - (i < ctx->streamout.num_targets) ? ctx->streamout.targets[i] : NULL; - - if (target) { - panfrost_emit_streamout(ctx, &varyings[i], so.stride[i], ctx->streamout.offsets[i], output_count, target); - } else { - /* Emit a dummy buffer */ - panfrost_emit_varyings(ctx, &varyings[i], so.stride[i] * 4, output_count); - - /* Clear the attribute type */ - varyings[i].elements &= ~0xF; - } - } - - panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16, - vertex_count); - - /* fp32 vec4 gl_Position */ - ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.position_varying = - panfrost_emit_varyings(ctx, &varyings[gl_Position], - sizeof(float) * 4, vertex_count); - - - if (panfrost_writes_point_size(ctx)) - ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.pointer = - panfrost_emit_varyings(ctx, &varyings[gl_PointSize], - 2, vertex_count); - - if (reads_point_coord) - varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD; - - if (fs->reads_face) - varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING; - - if (fs->reads_frag_coord) - varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD; - - /* Let's go ahead and link varying meta to the buffer in question, now - * that that information is available. VARYING_SLOT_POS is mapped to - * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders - * */ - - panfrost_emit_varying_meta(trans.cpu, vs, - general, gl_Position, gl_PointSize, - gl_PointCoord, gl_FrontFacing); - - panfrost_emit_varying_meta(trans.cpu + vs_size, fs, - general, gl_FragCoord, gl_PointSize, - gl_PointCoord, gl_FrontFacing); - - /* Replace streamout */ - - struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu); - struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size); - - for (unsigned i = 0; i < vs->varying_count; i++) { - gl_varying_slot loc = vs->varyings_loc[i]; - - bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); - if (!captured) continue; - - struct pipe_stream_output o = pan_get_so(&so, loc); - ovs[i].index = o.output_buffer; - - /* Set the type appropriately. TODO: Integer varyings XXX */ - assert(o.stream == 0); - ovs[i].format = pan_xfb_format(o.num_components); - ovs[i].swizzle = panfrost_get_default_swizzle(o.num_components); - - /* Link to the fragment */ - signed fs_idx = -1; - - /* Link up */ - for (unsigned j = 0; j < fs->varying_count; ++j) { - if (fs->varyings_loc[j] == loc) { - fs_idx = j; - break; - } - } - - if (fs_idx >= 0) { - ofs[fs_idx].index = ovs[i].index; - ofs[fs_idx].format = ovs[i].format; - ofs[fs_idx].swizzle = ovs[i].swizzle; - } - } - - /* Replace point sprite */ - for (unsigned i = 0; i < fs->varying_count; i++) { - /* If we have a point sprite replacement, handle that here. We - * have to translate location first. TODO: Flip y in shader. - * We're already keying ... just time crunch .. */ - - if (has_point_coord(fs->point_sprite_mask, fs->varyings_loc[i])) { - ofs[i].index = gl_PointCoord; - - /* Swizzle out the z/w to 0/1 */ - ofs[i].format = MALI_RG16F; - ofs[i].swizzle = - panfrost_get_default_swizzle(2); - } - } - - /* Fix up unaligned addresses */ - for (unsigned i = 0; i < so_count; ++i) { - if (varyings[i].elements < MALI_RECORD_SPECIAL) - continue; - - unsigned align = (varyings[i].elements & 63); - - /* While we're at it, the SO buffers are linear */ - - if (!align) { - varyings[i].elements |= MALI_ATTR_LINEAR; - continue; - } - - /* We need to adjust alignment */ - varyings[i].elements &= ~63; - varyings[i].elements |= MALI_ATTR_LINEAR; - varyings[i].size += align; - - for (unsigned v = 0; v < vs->varying_count; ++v) { - if (ovs[v].index == i) - ovs[v].src_offset = vs->varyings[v].src_offset + align; - } - - for (unsigned f = 0; f < fs->varying_count; ++f) { - if (ofs[f].index == i) - ofs[f].src_offset = fs->varyings[f].src_offset + align; - } - } - - mali_ptr varyings_p = panfrost_upload_transient(batch, &varyings, idx * sizeof(union mali_attr)); - ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p; - ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p; - - ctx->payloads[PIPE_SHADER_VERTEX].postfix.varying_meta = trans.gpu; - ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varying_meta = trans.gpu + vs_size; -} -- 2.30.2