From: Alyssa Rosenzweig Date: Fri, 7 Jun 2019 21:25:28 +0000 (-0700) Subject: panfrost: Refactor texture/sampler upload X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=416fc3b5ef8606408fd28ff0a2326e9e58a92632;p=mesa.git panfrost: Refactor texture/sampler upload We move some coding packing the texture/sampler descriptors into dedicated functions (out of the terrifyingly long emit_for_draw monolith), cleaning them up as we go. The discovery triggering the cleanup is the format for including manual strides in the presence of mipmaps/cubemaps. Rather than placed at the end like previously assumed, they are interleaved after each address. This difference is relevant when handling NPOT linear mipmaps. Signed-off-by: Alyssa Rosenzweig --- diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h b/src/gallium/drivers/panfrost/include/panfrost-job.h index 8a4a7644070..fd23499a00c 100644 --- a/src/gallium/drivers/panfrost/include/panfrost-job.h +++ b/src/gallium/drivers/panfrost/include/panfrost-job.h @@ -1112,6 +1112,9 @@ enum mali_wrap_mode { /* Cubemap bloats everything up */ #define MAX_FACES (6) +/* For each pointer, there is an address and optionally also a stride */ +#define MAX_ELEMENTS (2) + /* Corresponds to the type passed to glTexImage2D and so forth */ /* Flags for usage2 */ @@ -1155,7 +1158,7 @@ struct mali_texture_descriptor { uint32_t unknown6; uint32_t unknown7; - mali_ptr swizzled_bitmaps[MAX_MIP_LEVELS * MAX_FACES]; + mali_ptr payload[MAX_MIP_LEVELS * MAX_FACES * MAX_ELEMENTS]; } __attribute__((packed)); /* Used as part of filter_mode */ diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 94fcae5fe4e..b08f50291fb 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -848,6 +848,109 @@ panfrost_stage_attributes(struct panfrost_context *ctx) ctx->payload_vertex.postfix.attribute_meta = transfer.gpu; } +static void +panfrost_upload_sampler_descriptors(struct panfrost_context *ctx) +{ + size_t desc_size = sizeof(struct mali_sampler_descriptor); + + for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { + if (!ctx->sampler_count[t]) continue; + + size_t transfer_size = desc_size * ctx->sampler_count[t]; + + struct panfrost_transfer transfer = + panfrost_allocate_transient(ctx, transfer_size); + + struct mali_sampler_descriptor *desc = + (struct mali_sampler_descriptor *) transfer.cpu; + + for (int i = 0; i < ctx->sampler_count[t]; ++i) + desc[i] = ctx->samplers[t][i]->hw; + + if (t == PIPE_SHADER_FRAGMENT) + ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu; + else if (t == PIPE_SHADER_VERTEX) + ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu; + else + assert(0); + } +} + +/* Computes the address to a texture at a particular slice */ + +static mali_ptr +panfrost_get_texture_address( + struct panfrost_resource *rsrc, + unsigned level, unsigned face) +{ + unsigned level_offset = rsrc->bo->slices[level].offset; + unsigned face_offset = face * rsrc->bo->cubemap_stride; + + return rsrc->bo->gpu + level_offset + face_offset; + +} + +static mali_ptr +panfrost_upload_tex( + struct panfrost_context *ctx, + struct panfrost_sampler_view *view) +{ + if (!view) + return (mali_ptr) NULL; + + struct pipe_resource *tex_rsrc = view->base.texture; + struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc; + + /* Do we interleave an explicit stride with every element? */ + + bool has_manual_stride = + view->hw.format.usage2 & MALI_TEX_MANUAL_STRIDE; + + /* Inject the addresses in, interleaving mip levels, cube faces, and + * strides in that order */ + + unsigned idx = 0; + + for (unsigned l = 0; l <= tex_rsrc->last_level; ++l) { + for (unsigned f = 0; f < tex_rsrc->array_size; ++f) { + view->hw.payload[idx++] = + panfrost_get_texture_address(rsrc, l, f); + + if (has_manual_stride) { + view->hw.payload[idx++] = + rsrc->bo->slices[l].stride; + } + } + } + + return panfrost_upload_transient(ctx, &view->hw, + sizeof(struct mali_texture_descriptor)); +} + +static void +panfrost_upload_texture_descriptors(struct panfrost_context *ctx) +{ + for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { + /* Shortcircuit */ + if (!ctx->sampler_view_count[t]) continue; + + uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + + for (int i = 0; i < ctx->sampler_view_count[t]; ++i) + trampolines[i] = + panfrost_upload_tex(ctx, ctx->sampler_views[t][i]); + + mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); + + if (t == PIPE_SHADER_FRAGMENT) + ctx->payload_tiler.postfix.texture_trampoline = trampoline; + else if (t == PIPE_SHADER_VERTEX) + ctx->payload_vertex.postfix.texture_trampoline = trampoline; + else + assert(0); + } +} + /* Go through dirty flags and actualise them in the cmdstream. */ void @@ -1040,80 +1143,11 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) /* We stage to transient, so always dirty.. */ panfrost_stage_attributes(ctx); - if (ctx->dirty & PAN_DIRTY_SAMPLERS) { - /* Upload samplers back to back, no padding */ - - for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { - if (!ctx->sampler_count[t]) continue; - - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]); - struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu; - - for (int i = 0; i < ctx->sampler_count[t]; ++i) { - desc[i] = ctx->samplers[t][i]->hw; - } - - if (t == PIPE_SHADER_FRAGMENT) - ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu; - else if (t == PIPE_SHADER_VERTEX) - ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu; - else - assert(0); - } - } + if (ctx->dirty & PAN_DIRTY_SAMPLERS) + panfrost_upload_sampler_descriptors(ctx); - if (ctx->dirty & PAN_DIRTY_TEXTURES) { - for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { - /* Shortcircuit */ - if (!ctx->sampler_view_count[t]) continue; - - uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS]; - - for (int i = 0; i < ctx->sampler_view_count[t]; ++i) { - if (!ctx->sampler_views[t][i]) - continue; - - struct pipe_resource *tex_rsrc = ctx->sampler_views[t][i]->base.texture; - struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc; - - /* Inject the addresses in, interleaving cube - * faces and mip levels appropriately. */ - - for (int l = 0; l <= tex_rsrc->last_level; ++l) { - for (int f = 0; f < tex_rsrc->array_size; ++f) { - unsigned idx = (l * tex_rsrc->array_size) + f; - - ctx->sampler_views[t][i]->hw.swizzled_bitmaps[idx] = - rsrc->bo->gpu + - rsrc->bo->slices[l].offset + - f * rsrc->bo->cubemap_stride; - } - } - - /* Inject the strides */ - unsigned usage2 = ctx->sampler_views[t][i]->hw.format.usage2; - - if (usage2 & MALI_TEX_MANUAL_STRIDE) { - unsigned idx = tex_rsrc->last_level * tex_rsrc->array_size; - idx += tex_rsrc->array_size; - - ctx->sampler_views[t][i]->hw.swizzled_bitmaps[idx] = - rsrc->bo->slices[0].stride; - } - - trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor)); - } - - mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); - - if (t == PIPE_SHADER_FRAGMENT) - ctx->payload_tiler.postfix.texture_trampoline = trampoline; - else if (t == PIPE_SHADER_VERTEX) - ctx->payload_vertex.postfix.texture_trampoline = trampoline; - else - assert(0); - } - } + if (ctx->dirty & PAN_DIRTY_TEXTURES) + panfrost_upload_texture_descriptors(ctx); const struct pipe_viewport_state *vp = &ctx->pipe_viewport; diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c b/src/gallium/drivers/panfrost/pandecode/decode.c index dac27c36684..00678a4c5ed 100644 --- a/src/gallium/drivers/panfrost/pandecode/decode.c +++ b/src/gallium/drivers/panfrost/pandecode/decode.c @@ -1508,7 +1508,7 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix pandecode_prop("unknown6 = 0x%" PRIx32, t->unknown6); pandecode_prop("unknown7 = 0x%" PRIx32, t->unknown7); - pandecode_log(".swizzled_bitmaps = {\n"); + pandecode_log(".payload = {\n"); pandecode_indent++; /* A bunch of bitmap pointers follow. @@ -1518,32 +1518,19 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix * possibilities to futureproof */ int bitmap_count = MALI_NEGATIVE(t->nr_mipmap_levels); + bool manual_stride = f.usage2 & MALI_TEX_MANUAL_STRIDE; - if (!f.is_not_cubemap) { - /* Miptree for each face */ + /* Miptree for each face */ + if (!f.is_not_cubemap) bitmap_count *= 6; - } - - if (f.usage2 & MALI_TEX_MANUAL_STRIDE) { - /* Stride for each... what exactly? TODO More traces */ - - if (bitmap_count > 1) { - pandecode_msg("Manual stride with mip/cubemaps, decode uncertain"); - } - /* This is a guess, we've only - * seen for 1-level non-mip 2D - * */ + /* Stride for each element */ + if (manual_stride) + bitmap_count *= 2; - bitmap_count += 1; - } - - int max_count = sizeof(t->swizzled_bitmaps) / sizeof(t->swizzled_bitmaps[0]); - - if (bitmap_count > max_count) { - pandecode_msg("XXX: bitmap count tripped"); - bitmap_count = max_count; - } + /* Sanity check the size */ + int max_count = sizeof(t->payload) / sizeof(t->payload[0]); + assert (bitmap_count <= max_count); /* Dump more to be safe, but not _that_ much more */ int safe_count = MIN2(bitmap_count * 2, max_count); @@ -1553,15 +1540,15 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix /* How we dump depends if this is a stride or a pointer */ - if ((f.usage2 & MALI_TEX_MANUAL_STRIDE) && ((i + 1) == bitmap_count)) { + if ((f.usage2 & MALI_TEX_MANUAL_STRIDE) && (i & 1)) { /* signed 32-bit snuck in as a 64-bit pointer */ - uint64_t stride_set = t->swizzled_bitmaps[i]; + uint64_t stride_set = t->payload[i]; uint32_t clamped_stride = stride_set; int32_t stride = clamped_stride; assert(stride_set == clamped_stride); pandecode_log("%s(mali_ptr) %d /* stride */, \n", prefix, stride); } else { - char *a = pointer_as_memory_reference(t->swizzled_bitmaps[i]); + char *a = pointer_as_memory_reference(t->payload[i]); pandecode_log("%s%s, \n", prefix, a); free(a); }