From d36ca7c0a38dcae2e63296b38558844084e21d5d Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 19 Dec 2019 16:46:43 -0500 Subject: [PATCH] panfrost: Remove pan_shift_odd Padded counts are numbers of the form: n = (2k + 1) * 2^s for k, s integers. Rather than explicitly store k and s separately and then compute this formula on demand, it's much cleaner to store the padded number itself, which is what you manipulate most of the time. When you do need k,s it is easy to factor by noticing the bitwise representation: s = ctz(n) k = n >> (s + 1) Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 14 ++-- src/panfrost/encoder/pan_attributes.c | 93 ++++------------------ src/panfrost/encoder/pan_encoder.h | 10 +-- 3 files changed, 22 insertions(+), 95 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index ea2bb442047..27b6cd7be2f 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -1551,16 +1551,16 @@ panfrost_draw_vbo( /* Encode the padded vertex count */ if (info->instance_count > 1) { - struct pan_shift_odd so = - panfrost_padded_vertex_count(vertex_count); + ctx->padded_count = panfrost_padded_vertex_count(vertex_count); - ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = so.shift; - ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = so.shift; + unsigned shift = __builtin_ctz(ctx->padded_count); + unsigned k = ctx->padded_count >> (shift + 1); - ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = so.odd; - ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = so.odd; + ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = shift; + ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = shift; - ctx->padded_count = pan_expand_shift_odd(so); + ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = k; + ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = k; } else { ctx->padded_count = vertex_count; diff --git a/src/panfrost/encoder/pan_attributes.c b/src/panfrost/encoder/pan_attributes.c index 8c102f2cd96..9899bf8b169 100644 --- a/src/panfrost/encoder/pan_attributes.c +++ b/src/panfrost/encoder/pan_attributes.c @@ -37,82 +37,18 @@ /* Given an odd number (of the form 2k + 1), compute k */ #define ODD(odd) ((odd - 1) >> 1) -/* Given the shift/odd pair, recover the original padded integer */ - -unsigned -pan_expand_shift_odd(struct pan_shift_odd o) -{ - unsigned odd = 2*o.odd + 1; - unsigned shift = 1 << o.shift; - return odd * shift; -} - -static inline struct pan_shift_odd -pan_factored(unsigned pot, unsigned odd) -{ - struct pan_shift_odd out; - - assert(util_is_power_of_two_or_zero(pot)); - assert(odd & 1); - - /* Odd is of the form (2k + 1) = (k << 1) + 1 = (k << 1) | 1. - * - * So (odd >> 1) = ((k << 1) | 1) >> 1 = ((k << 1) >> 1) | (1 >> 1) - * = k | 0 = k */ - - out.odd = (odd >> 1); - - /* POT is the form (1 << shift) */ - out.shift = __builtin_ctz(pot); - - return out; -} - - -/* For small vertices. Second argument is whether the primitive takes a - * power-of-two argument, which determines how rounding works. True for POINTS - * and LINES, false for TRIANGLES. Presumably true for QUADS but you'd be crazy - * to try instanced quads on ES class hardware <3 */ - -static struct { - unsigned pot; - unsigned odd; -} small_lut[] = { - { 0, 1 }, - { 1, 1 }, - { 2, 1 }, - { 1, 3 }, - { 4, 1 }, - { 1, 5 }, - { 2, 3 }, - { 1, 7 }, - { 8, 1 }, - { 1, 9 }, - { 2, 5 }, - { 4, 3 }, /* 11 */ - { 4, 3 }, - { 2, 7 }, /* 13 */ - { 2, 7 }, - { 16, 1 }, /* 15 */ - { 16, 1 }, - { 2, 9 }, - { 4, 5 }, /* 20 */ - { 4, 5 } -}; - -static struct pan_shift_odd +static unsigned panfrost_small_padded_vertex_count(unsigned idx) { - return pan_factored( - small_lut[idx].pot, - small_lut[idx].odd); + if (idx == 11 || idx == 13 || idx == 15 || idx == 19) + return idx + 1; + else + return idx; } -static struct pan_shift_odd +static unsigned panfrost_large_padded_vertex_count(uint32_t vertex_count) { - struct pan_shift_odd out = { 0 }; - /* First, we have to find the highest set one */ unsigned highest = 32 - __builtin_clz(vertex_count); @@ -129,22 +65,21 @@ panfrost_large_padded_vertex_count(uint32_t vertex_count) switch (middle_two) { case 0b00: if (nibble & 1) - return pan_factored(1 << n, 9); + return (1 << n) * 9; else - return pan_factored(1 << (n + 1), 5); + return (1 << (n + 1)) * 5; case 0b01: - return pan_factored(1 << (n + 2), 3); + return (1 << (n + 2)) * 3; case 0b10: - return pan_factored(1 << (n + 1), 7); + return (1 << (n + 1)) * 7; case 0b11: - default: /* unreachable */ - return pan_factored(1 << (n + 4), 1); + return (1 << (n + 4)); + default: + return 0; /* unreachable */ } - - return out; } -struct pan_shift_odd +unsigned panfrost_padded_vertex_count(unsigned vertex_count) { if (vertex_count < 20) diff --git a/src/panfrost/encoder/pan_encoder.h b/src/panfrost/encoder/pan_encoder.h index d3eb5fdc3b1..07033ac32c2 100644 --- a/src/panfrost/encoder/pan_encoder.h +++ b/src/panfrost/encoder/pan_encoder.h @@ -88,12 +88,7 @@ const char * panfrost_model_name(unsigned gpu_id); /* Attributes / instancing */ -struct pan_shift_odd { - unsigned shift; - unsigned odd; -}; - -struct pan_shift_odd +unsigned panfrost_padded_vertex_count(unsigned vertex_count); unsigned @@ -103,7 +98,4 @@ panfrost_vertex_instanced( unsigned divisor, union mali_attr *attrs); -unsigned -pan_expand_shift_odd(struct pan_shift_odd o); - #endif -- 2.30.2