From 5a7688fdecd76c7d9cd87f6f6c93eb32870a2146 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Thu, 11 Jul 2019 08:06:41 +0200 Subject: [PATCH] panfrost: Use 64-bit descriptors globally Midgard supports two modes of operation, 32-bit mode and 64-bit mode. The GPU is natively 64-bit, but job descriptors can be submitted in 32-bit mode. Among other changes, 32-bit mode shortens pointer sizes to use 32-bit pointers rather than the full 64-bit range. The blob decides which mode to use based on the CPU bitness, so an armhf system uses 32-bit descriptors and an aarch64 system uses 64-bit descriptors. For a while, we mimicked this, bu inevitably this caused the 32-bit support to lag behind as our reference platform is 64-bit. To combat the code staleness, we traced an older GPU paired with a 64-bit CPU (the Midgard T720 on-board the sunxi H64). From there, we could tell which fields were really about hardware and which fields were simply reflections of the descriptor bitness. From there, we decided to remove support for 32-bit descriptors entirely, using 64-bit descriptors unconditionally. There is minimal performance penalty for this in practice, and it allows us to unify these disparate code paths. This fixes: - T860 + armhf - T820 + armhf - T760 + aarch64 And will help bringup of 1st/2nd generation Midgard regardless of CPU. [Work done by Tomeu. Commit message written by Alyssa.] v2: Add comments preserving information about the old behaviour for future reference. Fix a compiler warning. (Alyssa) Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 27 ++++--------- src/gallium/drivers/panfrost/pan_fragment.c | 2 - src/gallium/drivers/panfrost/pan_screen.c | 13 +------ src/panfrost/include/panfrost-job.h | 42 ++++++++------------- src/panfrost/pandecode/decode.c | 9 ----- 5 files changed, 26 insertions(+), 67 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index d720f086361..49ca1f570f1 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -272,8 +272,10 @@ panfrost_invalidate_frame(struct panfrost_context *ctx) static void panfrost_emit_vertex_payload(struct panfrost_context *ctx) { + /* 0x2 bit clear on 32-bit T6XX */ + struct midgard_payload_vertex_tiler payload = { - .gl_enables = 0x4 | (ctx->is_t6xx ? 0 : 0x2), + .gl_enables = 0x4 | 0x2, }; memcpy(&ctx->payload_vertex, &payload, sizeof(payload)); @@ -454,9 +456,7 @@ panfrost_default_shader_backend(struct panfrost_context *ctx) .unknown2_4 = MALI_NO_MSAA | 0x4e0, }; - if (ctx->is_t6xx) { - shader.unknown2_4 |= 0x10; - } + /* unknown2_4 has 0x10 bit set on 32-bit T6XX */ struct pipe_stencil_state default_stencil = { .enabled = 0, @@ -491,24 +491,14 @@ panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler) { struct mali_job_descriptor_header job = { .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX, -#ifdef __LP64__ .job_descriptor_size = 1, -#endif }; struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payload_tiler : &ctx->payload_vertex; - /* There's some padding hacks on 32-bit */ - -#ifdef __LP64__ - int offset = 0; -#else - int offset = 4; -#endif struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload)); - memcpy(transfer.cpu, &job, sizeof(job)); - memcpy(transfer.cpu + sizeof(job) - offset, payload, sizeof(*payload)); + memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload)); return transfer; } @@ -1753,7 +1743,7 @@ panfrost_draw_vbo( ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(ctx->vertex_count); /* Reverse index state */ - ctx->payload_tiler.prefix.indices = (uintptr_t) NULL; + ctx->payload_tiler.prefix.indices = (u64) NULL; } /* Dispatch "compute jobs" for the vertex/tiler pair as (1, @@ -1815,13 +1805,12 @@ panfrost_create_rasterizer_state( struct pipe_context *pctx, const struct pipe_rasterizer_state *cso) { - struct panfrost_context *ctx = pan_context(pctx); struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer); so->base = *cso; - /* Bitmask, unknown meaning of the start value */ - so->tiler_gl_enables = ctx->is_t6xx ? 0x105 : 0x7; + /* Bitmask, unknown meaning of the start value. 0x105 on 32-bit T6XX */ + so->tiler_gl_enables = 0x7; if (cso->front_ccw) so->tiler_gl_enables |= MALI_FRONT_CCW_TOP; diff --git a/src/gallium/drivers/panfrost/pan_fragment.c b/src/gallium/drivers/panfrost/pan_fragment.c index ab42e763be7..7ffb9db0a05 100644 --- a/src/gallium/drivers/panfrost/pan_fragment.c +++ b/src/gallium/drivers/panfrost/pan_fragment.c @@ -70,9 +70,7 @@ panfrost_fragment_job(struct panfrost_context *ctx, bool has_draws) struct mali_job_descriptor_header header = { .job_type = JOB_TYPE_FRAGMENT, .job_index = 1, -#ifdef __LP64__ .job_descriptor_size = 1 -#endif }; struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index ffd2de4734e..6fe9e60f69a 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -511,22 +511,13 @@ panfrost_create_screen(int fd, struct renderonly *ro) screen->gpu_id = panfrost_drm_query_gpu_version(screen); - /* Check if we're loading against a supported GPU model - * paired with a supported CPU (differences from - * armhf/aarch64 break models on incompatible CPUs at the - * moment -- this is a TODO). In other words, we whitelist - * RK3288, RK3399, and S912, which are verified to work. */ + /* Check if we're loading against a supported GPU model. */ switch (screen->gpu_id) { -#ifdef __LP64__ + case 0x750: /* T760 */ case 0x820: /* T820 */ case 0x860: /* T860 */ break; -#else - case 0x750: /* T760 */ - break; -#endif - default: /* Fail to load against untested models */ debug_printf("panfrost: Unsupported model %X", diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h index 0fbe0a2fa10..297d0806adc 100644 --- a/src/panfrost/include/panfrost-job.h +++ b/src/panfrost/include/panfrost-job.h @@ -31,7 +31,7 @@ #include #include -#define MALI_SHORT_PTR_BITS (sizeof(uintptr_t)*8) +#define MALI_SHORT_PTR_BITS (sizeof(u64)*8) #define MALI_FBD_HIERARCHY_WEIGHTS 8 @@ -935,7 +935,7 @@ struct mali_vertex_tiler_prefix { * indices (width depends on flags). Thanks, guys, for not making my * life insane for once! NULL for non-indexed draws. */ - uintptr_t indices; + u64 indices; } __attribute__((packed)); /* Point size / line width can either be specified as a 32-bit float (for @@ -947,7 +947,7 @@ struct mali_vertex_tiler_prefix { union midgard_primitive_size { float constant; - uintptr_t pointer; + u64 pointer; }; struct bifrost_vertex_only { @@ -1011,34 +1011,34 @@ struct mali_vertex_tiler_postfix { * output from the vertex shader for tiler jobs. */ - uintptr_t position_varying; + u64 position_varying; /* An array of mali_uniform_buffer_meta's. The size is given by the * shader_meta. */ - uintptr_t uniform_buffers; + u64 uniform_buffers; /* This is a pointer to an array of pointers to the texture * descriptors, number of pointers bounded by number of textures. The * indirection is needed to accomodate varying numbers and sizes of * texture descriptors */ - uintptr_t texture_trampoline; + u64 texture_trampoline; /* For OpenGL, from what I've seen, this is intimately connected to * texture_meta. cwabbott says this is not the case under Vulkan, hence * why this field is seperate (Midgard is Vulkan capable). Pointer to * array of sampler descriptors (which are uniform in size) */ - uintptr_t sampler_descriptor; + u64 sampler_descriptor; - uintptr_t uniforms; + u64 uniforms; u8 flags : 4; - uintptr_t _shader_upper : MALI_SHORT_PTR_BITS - 4; /* struct shader_meta */ - uintptr_t attributes; /* struct attribute_buffer[] */ - uintptr_t attribute_meta; /* attribute_meta[] */ - uintptr_t varyings; /* struct attr */ - uintptr_t varying_meta; /* pointer */ - uintptr_t viewport; - uintptr_t occlusion_counter; /* A single bit as far as I can tell */ + u64 _shader_upper : MALI_SHORT_PTR_BITS - 4; /* struct shader_meta */ + u64 attributes; /* struct attribute_buffer[] */ + u64 attribute_meta; /* attribute_meta[] */ + u64 varyings; /* struct attr */ + u64 varying_meta; /* pointer */ + u64 viewport; + u64 occlusion_counter; /* A single bit as far as I can tell */ /* Note: on Bifrost, this isn't actually the FBD. It points to * bifrost_scratchpad instead. However, it does point to the same thing @@ -1048,16 +1048,8 @@ struct mali_vertex_tiler_postfix { } __attribute__((packed)); struct midgard_payload_vertex_tiler { -#ifndef __LP64__ - union midgard_primitive_size primitive_size; -#endif - struct mali_vertex_tiler_prefix prefix; -#ifndef __LP64__ - u32 zero3; -#endif - u16 gl_enables; // 0x5 /* Both zero for non-instanced draws. For instanced draws, a @@ -1072,13 +1064,11 @@ struct midgard_payload_vertex_tiler { /* Offset for first vertex in buffer */ u32 draw_start; - uintptr_t zero5; + u64 zero5; struct mali_vertex_tiler_postfix postfix; -#ifdef __LP64__ union midgard_primitive_size primitive_size; -#endif } __attribute__((packed)); struct bifrost_payload_vertex { diff --git a/src/panfrost/pandecode/decode.c b/src/panfrost/pandecode/decode.c index 5d38ec22b19..7e7fed81f99 100644 --- a/src/panfrost/pandecode/decode.c +++ b/src/panfrost/pandecode/decode.c @@ -2163,15 +2163,6 @@ pandecode_vertex_or_tiler_job_mdg(const struct mali_job_descriptor_header *h, if (v->draw_start) pandecode_prop("draw_start = %d", v->draw_start); -#ifndef __LP64__ - - if (v->zero3) { - pandecode_msg("Zero tripped\n"); - pandecode_prop("zero3 = 0x%" PRIx32, v->zero3); - } - -#endif - if (v->zero5) { pandecode_msg("Zero tripped\n"); pandecode_prop("zero5 = 0x%" PRIx64, v->zero5); -- 2.30.2