From: Alyssa Rosenzweig Date: Thu, 19 Dec 2019 21:04:59 +0000 (-0500) Subject: panfrost: Rename pan_instancing.c -> pan_attributes.c X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ddcd68f52b487cd71e08e2120ab5d962a0116cc8;p=mesa.git panfrost: Rename pan_instancing.c -> pan_attributes.c Let's follow the naming convention that panfrost command stream code is organized by command stream structure. Signed-off-by: Alyssa Rosenzweig --- diff --git a/src/gallium/drivers/panfrost/Makefile.sources b/src/gallium/drivers/panfrost/Makefile.sources index acca7d652e8..2fd578877ef 100644 --- a/src/gallium/drivers/panfrost/Makefile.sources +++ b/src/gallium/drivers/panfrost/Makefile.sources @@ -22,7 +22,7 @@ C_SOURCES := \ pan_format.c \ pan_format.h \ pan_fragment.c \ - pan_instancing.c \ + pan_attributes.c \ pan_job.c \ pan_job.h \ pan_mfbd.c \ diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index feecbbdb363..87eb74b5305 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -42,7 +42,7 @@ files_panfrost = files( 'pan_blend_cso.c', 'pan_compute.c', 'pan_fragment.c', - 'pan_instancing.c', + 'pan_attributes.c', 'pan_scoreboard.c', 'pan_sfbd.c', 'pan_mfbd.c', diff --git a/src/gallium/drivers/panfrost/pan_attributes.c b/src/gallium/drivers/panfrost/pan_attributes.c new file mode 100644 index 00000000000..473026cb8a0 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_attributes.c @@ -0,0 +1,351 @@ +/* + * Copyright (C) 2018-2019 Alyssa Rosenzweig + * Copyright (C) 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "pan_bo.h" +#include "pan_context.h" + +/* See mali_job for notes on how this works. But basically, for small vertex + * counts, we have a lookup table, and for large vertex counts, we look at the + * high bits as a heuristic. This has to match exactly how the hardware + * calculates this (which is why the algorithm is so weird) or else instancing + * will break. */ + +/* Given an odd number (of the form 2k + 1), compute k */ +#define ODD(odd) ((odd - 1) >> 1) + +/* Given the shift/odd pair, recover the original padded integer */ + +unsigned +pan_expand_shift_odd(struct pan_shift_odd o) +{ + unsigned odd = 2*o.odd + 1; + unsigned shift = 1 << o.shift; + return odd * shift; +} + +static inline struct pan_shift_odd +pan_factored(unsigned pot, unsigned odd) +{ + struct pan_shift_odd out; + + assert(util_is_power_of_two_or_zero(pot)); + assert(odd & 1); + + /* Odd is of the form (2k + 1) = (k << 1) + 1 = (k << 1) | 1. + * + * So (odd >> 1) = ((k << 1) | 1) >> 1 = ((k << 1) >> 1) | (1 >> 1) + * = k | 0 = k */ + + out.odd = (odd >> 1); + + /* POT is the form (1 << shift) */ + out.shift = __builtin_ctz(pot); + + return out; +} + + +/* For small vertices. Second argument is whether the primitive takes a + * power-of-two argument, which determines how rounding works. True for POINTS + * and LINES, false for TRIANGLES. Presumably true for QUADS but you'd be crazy + * to try instanced quads on ES class hardware <3 */ + +static struct { + unsigned pot; + unsigned odd; +} small_lut[] = { + { 0, 1 }, + { 1, 1 }, + { 2, 1 }, + { 1, 3 }, + { 4, 1 }, + { 1, 5 }, + { 2, 3 }, + { 1, 7 }, + { 8, 1 }, + { 1, 9 }, + { 2, 5 }, + { 4, 3 }, /* 11 */ + { 4, 3 }, + { 2, 7 }, /* 13 */ + { 2, 7 }, + { 16, 1 }, /* 15 */ + { 16, 1 }, + { 2, 9 }, + { 4, 5 }, /* 20 */ + { 4, 5 } +}; + +static struct pan_shift_odd +panfrost_small_padded_vertex_count(unsigned idx) +{ + return pan_factored( + small_lut[idx].pot, + small_lut[idx].odd); +} + +static struct pan_shift_odd +panfrost_large_padded_vertex_count(uint32_t vertex_count) +{ + struct pan_shift_odd out = { 0 }; + + /* First, we have to find the highest set one */ + unsigned highest = 32 - __builtin_clz(vertex_count); + + /* Using that, we mask out the highest 4-bits */ + unsigned n = highest - 4; + unsigned nibble = (vertex_count >> n) & 0xF; + + /* Great, we have the nibble. Now we can just try possibilities. Note + * that we don't care about the bottom most bit in most cases, and we + * know the top bit must be 1 */ + + unsigned middle_two = (nibble >> 1) & 0x3; + + switch (middle_two) { + case 0b00: + if (nibble & 1) + return pan_factored(1 << n, 9); + else + return pan_factored(1 << (n + 1), 5); + case 0b01: + return pan_factored(1 << (n + 2), 3); + case 0b10: + return pan_factored(1 << (n + 1), 7); + case 0b11: + return pan_factored(1 << (n + 4), 1); + default: + unreachable("Invalid two bits"); + } + + return out; +} + +struct pan_shift_odd +panfrost_padded_vertex_count( + unsigned vertex_count, + bool pot) +{ + assert(vertex_count > 0); + + if (vertex_count < 20) { + /* Add an off-by-one if it won't align naturally (quirk of the hardware) */ + //if (!pot) + // vertex_count++; + + return panfrost_small_padded_vertex_count(vertex_count); + } else + return panfrost_large_padded_vertex_count(vertex_count); +} + +/* The much, much more irritating case -- instancing is enabled. See + * panfrost_job.h for notes on how this works */ + +static unsigned +panfrost_vertex_instanced( + struct panfrost_batch *batch, + struct panfrost_resource *rsrc, + unsigned divisor, + union mali_attr *attrs, + mali_ptr addr, + unsigned vertex_count, + unsigned instance_count) +{ + /* First, grab the padded vertex count */ + + struct pan_shift_odd o = { + .shift = batch->ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift, + .odd = batch->ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd, + }; + + unsigned padded_count = batch->ctx->padded_count; + + /* Depending if there is an instance divisor or not, packing varies. + * When there is a divisor, the hardware-level divisor is actually the + * product of the instance divisor and the padded count */ + + unsigned hw_divisor = padded_count * divisor; + + if (divisor == 0) { + /* Per-vertex attributes use the MODULO mode. First, compute + * the modulus */ + + attrs->elements |= MALI_ATTR_MODULO; + attrs->shift = o.shift; + attrs->extra_flags = o.odd; + + return 1; + } else if (util_is_power_of_two_or_zero(hw_divisor)) { + /* If there is a divisor but the hardware divisor works out to + * a power of two (not terribly exceptional), we can use an + * easy path (just shifting) */ + + attrs->elements |= MALI_ATTR_POT_DIVIDE; + attrs->shift = __builtin_ctz(hw_divisor); + + return 1; + } else { + /* We have a NPOT divisor. Here's the fun one (multipling by + * the inverse and shifting) */ + + /* floor(log2(d)) */ + unsigned shift = util_logbase2(hw_divisor); + + /* m = ceil(2^(32 + shift) / d) */ + uint64_t shift_hi = 32 + shift; + uint64_t t = 1ll << shift_hi; + double t_f = t; + double hw_divisor_d = hw_divisor; + double m_f = ceil(t_f / hw_divisor_d); + unsigned m = m_f; + + /* Default case */ + uint32_t magic_divisor = m, extra_flags = 0; + + /* e = 2^(shift + 32) % d */ + uint64_t e = t % hw_divisor; + + /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob + * seems to use a different condition */ + if (e <= (1ll << shift)) { + magic_divisor = m - 1; + extra_flags = 1; + } + + /* Top flag implicitly set */ + assert(magic_divisor & (1u << 31)); + magic_divisor &= ~(1u << 31); + + /* Upload to two different slots */ + + attrs[0].elements |= MALI_ATTR_NPOT_DIVIDE; + attrs[0].shift = shift; + attrs[0].extra_flags = extra_flags; + + attrs[1].unk = 0x20; + attrs[1].magic_divisor = magic_divisor; + attrs[1].zero = 0; + attrs[1].divisor = divisor; + + return 2; + } +} + +void +panfrost_emit_vertex_data(struct panfrost_batch *batch) +{ + struct panfrost_context *ctx = batch->ctx; + struct panfrost_vertex_state *so = ctx->vertex; + + /* Staged mali_attr, and index into them. i =/= k, depending on the + * vertex buffer mask and instancing. Twice as much room is allocated, + * for a worst case of NPOT_DIVIDEs which take up extra slot */ + union mali_attr attrs[PIPE_MAX_ATTRIBS * 2]; + unsigned k = 0; + + unsigned vertex_count = ctx->vertex_count; + unsigned instanced_count = ctx->instance_count; + + for (unsigned i = 0; i < so->num_elements; ++i) { + /* We map a mali_attr to be 1:1 with the mali_attr_meta, which + * means duplicating some vertex buffers (who cares? aside from + * maybe some caching implications but I somehow doubt that + * matters) */ + + struct pipe_vertex_element *elem = &so->pipe[i]; + unsigned vbi = elem->vertex_buffer_index; + + /* The exception to 1:1 mapping is that we can have multiple + * entries (NPOT divisors), so we fixup anyways */ + + so->hw[i].index = k; + + if (!(ctx->vb_mask & (1 << vbi))) continue; + + struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi]; + struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); + + if (!rsrc) continue; + + /* Align to 64 bytes by masking off the lower bits. This + * will be adjusted back when we fixup the src_offset in + * mali_attr_meta */ + + mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset; + mali_ptr addr = raw_addr & ~63; + unsigned chopped_addr = raw_addr - addr; + + /* Add a dependency of the batch on the vertex buffer */ + panfrost_batch_add_bo(batch, rsrc->bo, + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_READ | + PAN_BO_ACCESS_VERTEX_TILER); + + /* Set common fields */ + attrs[k].elements = addr; + attrs[k].stride = buf->stride; + + /* Since we advanced the base pointer, we shrink the buffer + * size */ + attrs[k].size = rsrc->base.width0 - buf->buffer_offset; + + /* We need to add the extra size we masked off (for + * correctness) so the data doesn't get clamped away */ + attrs[k].size += chopped_addr; + + /* For non-instancing make sure we initialize */ + attrs[k].shift = attrs[k].extra_flags = 0; + + /* Instancing uses a dramatically different code path than + * linear, so dispatch for the actual emission now that the + * common code is finished */ + + unsigned divisor = elem->instance_divisor; + + if (divisor && instanced_count == 1) { + /* Silly corner case where there's a divisor(=1) but + * there's no legitimate instancing. So we want *every* + * attribute to be the same. So set stride to zero so + * we don't go anywhere. */ + + attrs[k].size = attrs[k].stride + chopped_addr; + attrs[k].stride = 0; + attrs[k++].elements |= MALI_ATTR_LINEAR; + } else if (instanced_count <= 1) { + /* Normal, non-instanced attributes */ + attrs[k++].elements |= MALI_ATTR_LINEAR; + } else { + k += panfrost_vertex_instanced( + batch, rsrc, divisor, &attrs[k], addr, vertex_count, instanced_count); + } + } + + /* Upload whatever we emitted and go */ + + ctx->payloads[PIPE_SHADER_VERTEX].postfix.attributes = + panfrost_upload_transient(batch, attrs, k * sizeof(union mali_attr)); +} + + diff --git a/src/gallium/drivers/panfrost/pan_instancing.c b/src/gallium/drivers/panfrost/pan_instancing.c deleted file mode 100644 index 473026cb8a0..00000000000 --- a/src/gallium/drivers/panfrost/pan_instancing.c +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Copyright (C) 2018-2019 Alyssa Rosenzweig - * Copyright (C) 2019 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include "pan_bo.h" -#include "pan_context.h" - -/* See mali_job for notes on how this works. But basically, for small vertex - * counts, we have a lookup table, and for large vertex counts, we look at the - * high bits as a heuristic. This has to match exactly how the hardware - * calculates this (which is why the algorithm is so weird) or else instancing - * will break. */ - -/* Given an odd number (of the form 2k + 1), compute k */ -#define ODD(odd) ((odd - 1) >> 1) - -/* Given the shift/odd pair, recover the original padded integer */ - -unsigned -pan_expand_shift_odd(struct pan_shift_odd o) -{ - unsigned odd = 2*o.odd + 1; - unsigned shift = 1 << o.shift; - return odd * shift; -} - -static inline struct pan_shift_odd -pan_factored(unsigned pot, unsigned odd) -{ - struct pan_shift_odd out; - - assert(util_is_power_of_two_or_zero(pot)); - assert(odd & 1); - - /* Odd is of the form (2k + 1) = (k << 1) + 1 = (k << 1) | 1. - * - * So (odd >> 1) = ((k << 1) | 1) >> 1 = ((k << 1) >> 1) | (1 >> 1) - * = k | 0 = k */ - - out.odd = (odd >> 1); - - /* POT is the form (1 << shift) */ - out.shift = __builtin_ctz(pot); - - return out; -} - - -/* For small vertices. Second argument is whether the primitive takes a - * power-of-two argument, which determines how rounding works. True for POINTS - * and LINES, false for TRIANGLES. Presumably true for QUADS but you'd be crazy - * to try instanced quads on ES class hardware <3 */ - -static struct { - unsigned pot; - unsigned odd; -} small_lut[] = { - { 0, 1 }, - { 1, 1 }, - { 2, 1 }, - { 1, 3 }, - { 4, 1 }, - { 1, 5 }, - { 2, 3 }, - { 1, 7 }, - { 8, 1 }, - { 1, 9 }, - { 2, 5 }, - { 4, 3 }, /* 11 */ - { 4, 3 }, - { 2, 7 }, /* 13 */ - { 2, 7 }, - { 16, 1 }, /* 15 */ - { 16, 1 }, - { 2, 9 }, - { 4, 5 }, /* 20 */ - { 4, 5 } -}; - -static struct pan_shift_odd -panfrost_small_padded_vertex_count(unsigned idx) -{ - return pan_factored( - small_lut[idx].pot, - small_lut[idx].odd); -} - -static struct pan_shift_odd -panfrost_large_padded_vertex_count(uint32_t vertex_count) -{ - struct pan_shift_odd out = { 0 }; - - /* First, we have to find the highest set one */ - unsigned highest = 32 - __builtin_clz(vertex_count); - - /* Using that, we mask out the highest 4-bits */ - unsigned n = highest - 4; - unsigned nibble = (vertex_count >> n) & 0xF; - - /* Great, we have the nibble. Now we can just try possibilities. Note - * that we don't care about the bottom most bit in most cases, and we - * know the top bit must be 1 */ - - unsigned middle_two = (nibble >> 1) & 0x3; - - switch (middle_two) { - case 0b00: - if (nibble & 1) - return pan_factored(1 << n, 9); - else - return pan_factored(1 << (n + 1), 5); - case 0b01: - return pan_factored(1 << (n + 2), 3); - case 0b10: - return pan_factored(1 << (n + 1), 7); - case 0b11: - return pan_factored(1 << (n + 4), 1); - default: - unreachable("Invalid two bits"); - } - - return out; -} - -struct pan_shift_odd -panfrost_padded_vertex_count( - unsigned vertex_count, - bool pot) -{ - assert(vertex_count > 0); - - if (vertex_count < 20) { - /* Add an off-by-one if it won't align naturally (quirk of the hardware) */ - //if (!pot) - // vertex_count++; - - return panfrost_small_padded_vertex_count(vertex_count); - } else - return panfrost_large_padded_vertex_count(vertex_count); -} - -/* The much, much more irritating case -- instancing is enabled. See - * panfrost_job.h for notes on how this works */ - -static unsigned -panfrost_vertex_instanced( - struct panfrost_batch *batch, - struct panfrost_resource *rsrc, - unsigned divisor, - union mali_attr *attrs, - mali_ptr addr, - unsigned vertex_count, - unsigned instance_count) -{ - /* First, grab the padded vertex count */ - - struct pan_shift_odd o = { - .shift = batch->ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift, - .odd = batch->ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd, - }; - - unsigned padded_count = batch->ctx->padded_count; - - /* Depending if there is an instance divisor or not, packing varies. - * When there is a divisor, the hardware-level divisor is actually the - * product of the instance divisor and the padded count */ - - unsigned hw_divisor = padded_count * divisor; - - if (divisor == 0) { - /* Per-vertex attributes use the MODULO mode. First, compute - * the modulus */ - - attrs->elements |= MALI_ATTR_MODULO; - attrs->shift = o.shift; - attrs->extra_flags = o.odd; - - return 1; - } else if (util_is_power_of_two_or_zero(hw_divisor)) { - /* If there is a divisor but the hardware divisor works out to - * a power of two (not terribly exceptional), we can use an - * easy path (just shifting) */ - - attrs->elements |= MALI_ATTR_POT_DIVIDE; - attrs->shift = __builtin_ctz(hw_divisor); - - return 1; - } else { - /* We have a NPOT divisor. Here's the fun one (multipling by - * the inverse and shifting) */ - - /* floor(log2(d)) */ - unsigned shift = util_logbase2(hw_divisor); - - /* m = ceil(2^(32 + shift) / d) */ - uint64_t shift_hi = 32 + shift; - uint64_t t = 1ll << shift_hi; - double t_f = t; - double hw_divisor_d = hw_divisor; - double m_f = ceil(t_f / hw_divisor_d); - unsigned m = m_f; - - /* Default case */ - uint32_t magic_divisor = m, extra_flags = 0; - - /* e = 2^(shift + 32) % d */ - uint64_t e = t % hw_divisor; - - /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob - * seems to use a different condition */ - if (e <= (1ll << shift)) { - magic_divisor = m - 1; - extra_flags = 1; - } - - /* Top flag implicitly set */ - assert(magic_divisor & (1u << 31)); - magic_divisor &= ~(1u << 31); - - /* Upload to two different slots */ - - attrs[0].elements |= MALI_ATTR_NPOT_DIVIDE; - attrs[0].shift = shift; - attrs[0].extra_flags = extra_flags; - - attrs[1].unk = 0x20; - attrs[1].magic_divisor = magic_divisor; - attrs[1].zero = 0; - attrs[1].divisor = divisor; - - return 2; - } -} - -void -panfrost_emit_vertex_data(struct panfrost_batch *batch) -{ - struct panfrost_context *ctx = batch->ctx; - struct panfrost_vertex_state *so = ctx->vertex; - - /* Staged mali_attr, and index into them. i =/= k, depending on the - * vertex buffer mask and instancing. Twice as much room is allocated, - * for a worst case of NPOT_DIVIDEs which take up extra slot */ - union mali_attr attrs[PIPE_MAX_ATTRIBS * 2]; - unsigned k = 0; - - unsigned vertex_count = ctx->vertex_count; - unsigned instanced_count = ctx->instance_count; - - for (unsigned i = 0; i < so->num_elements; ++i) { - /* We map a mali_attr to be 1:1 with the mali_attr_meta, which - * means duplicating some vertex buffers (who cares? aside from - * maybe some caching implications but I somehow doubt that - * matters) */ - - struct pipe_vertex_element *elem = &so->pipe[i]; - unsigned vbi = elem->vertex_buffer_index; - - /* The exception to 1:1 mapping is that we can have multiple - * entries (NPOT divisors), so we fixup anyways */ - - so->hw[i].index = k; - - if (!(ctx->vb_mask & (1 << vbi))) continue; - - struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi]; - struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); - - if (!rsrc) continue; - - /* Align to 64 bytes by masking off the lower bits. This - * will be adjusted back when we fixup the src_offset in - * mali_attr_meta */ - - mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset; - mali_ptr addr = raw_addr & ~63; - unsigned chopped_addr = raw_addr - addr; - - /* Add a dependency of the batch on the vertex buffer */ - panfrost_batch_add_bo(batch, rsrc->bo, - PAN_BO_ACCESS_SHARED | - PAN_BO_ACCESS_READ | - PAN_BO_ACCESS_VERTEX_TILER); - - /* Set common fields */ - attrs[k].elements = addr; - attrs[k].stride = buf->stride; - - /* Since we advanced the base pointer, we shrink the buffer - * size */ - attrs[k].size = rsrc->base.width0 - buf->buffer_offset; - - /* We need to add the extra size we masked off (for - * correctness) so the data doesn't get clamped away */ - attrs[k].size += chopped_addr; - - /* For non-instancing make sure we initialize */ - attrs[k].shift = attrs[k].extra_flags = 0; - - /* Instancing uses a dramatically different code path than - * linear, so dispatch for the actual emission now that the - * common code is finished */ - - unsigned divisor = elem->instance_divisor; - - if (divisor && instanced_count == 1) { - /* Silly corner case where there's a divisor(=1) but - * there's no legitimate instancing. So we want *every* - * attribute to be the same. So set stride to zero so - * we don't go anywhere. */ - - attrs[k].size = attrs[k].stride + chopped_addr; - attrs[k].stride = 0; - attrs[k++].elements |= MALI_ATTR_LINEAR; - } else if (instanced_count <= 1) { - /* Normal, non-instanced attributes */ - attrs[k++].elements |= MALI_ATTR_LINEAR; - } else { - k += panfrost_vertex_instanced( - batch, rsrc, divisor, &attrs[k], addr, vertex_count, instanced_count); - } - } - - /* Upload whatever we emitted and go */ - - ctx->payloads[PIPE_SHADER_VERTEX].postfix.attributes = - panfrost_upload_transient(batch, attrs, k * sizeof(union mali_attr)); -} - -