X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fpanfrost%2Fpan_context.c;h=630f6753fd413f149005768309c1a996041b7823;hb=53d6bb9fc633a4d0ad99c25ac4a9ca09f12d87bf;hp=556909613cb11ca9c9bc3c981205cf7b5c05eb90;hpb=7d24cef20025d7d58a4e9ceac321e5a82c732ee2;p=mesa.git diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 556909613cb..630f6753fd4 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -29,7 +29,8 @@ #include "pan_bo.h" #include "pan_context.h" -#include "pan_format.h" +#include "pan_minmax_cache.h" +#include "panfrost-quirks.h" #include "util/macros.h" #include "util/format/u_format.h" @@ -51,31 +52,31 @@ #include "pan_blending.h" #include "pan_blend_shaders.h" #include "pan_util.h" +#include "pandecode/decode.h" -/* Framebuffer descriptor */ - -static struct midgard_tiler_descriptor +struct midgard_tiler_descriptor panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count) { struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen); - struct midgard_tiler_descriptor t = {}; + bool hierarchy = !(screen->quirks & MIDGARD_NO_HIER_TILING); + struct midgard_tiler_descriptor t = {0}; unsigned height = batch->key.height; unsigned width = batch->key.width; t.hierarchy_mask = - panfrost_choose_hierarchy_mask(width, height, vertex_count); + panfrost_choose_hierarchy_mask(width, height, vertex_count, hierarchy); /* Compute the polygon header size and use that to offset the body */ unsigned header_size = panfrost_tiler_header_size( - width, height, t.hierarchy_mask); + width, height, t.hierarchy_mask, hierarchy); t.polygon_list_size = panfrost_tiler_full_size( - width, height, t.hierarchy_mask); + width, height, t.hierarchy_mask, hierarchy); /* Sanity check */ - if (t.hierarchy_mask) { + if (vertex_count) { struct panfrost_bo *tiler_heap; tiler_heap = panfrost_batch_get_tiler_heap(batch); @@ -91,6 +92,7 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count) struct panfrost_bo *tiler_dummy; tiler_dummy = panfrost_batch_get_tiler_dummy(batch); + header_size = MALI_TILER_MINIMUM_HEADER_SIZE; /* The tiler is disabled, so don't allow the tiler heap */ t.heap_start = tiler_dummy->gpu; @@ -100,13 +102,13 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count) t.polygon_list = tiler_dummy->gpu; /* Disable the tiler */ - t.hierarchy_mask |= MALI_TILER_DISABLED; - - if (screen->require_sfbd) { - t.hierarchy_mask = 0xFFF; /* TODO: What's this? */ - t.polygon_list_size = 0x200; + if (hierarchy) + t.hierarchy_mask |= MALI_TILER_DISABLED; + else { + t.hierarchy_mask = MALI_TILER_USER; + t.polygon_list_size = MALI_TILER_MINIMUM_HEADER_SIZE + 4; - /* We don't have a SET_VALUE job, so write the polygon list manually */ + /* We don't have a WRITE_VALUE job, so write the polygon list manually */ uint32_t *polygon_list_body = (uint32_t *) (tiler_dummy->cpu + header_size); polygon_list_body[0] = 0xa0000000; /* TODO: Just that? */ } @@ -118,54 +120,6 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count) return t; } -struct mali_single_framebuffer -panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count) -{ - unsigned width = batch->key.width; - unsigned height = batch->key.height; - - struct mali_single_framebuffer framebuffer = { - .width = MALI_POSITIVE(width), - .height = MALI_POSITIVE(height), - .unknown2 = 0x1f, - .format = { - .unk3 = 0x3, - }, - .clear_flags = 0x1000, - .unknown_address_0 = panfrost_batch_get_scratchpad(batch)->gpu, - .tiler = panfrost_emit_midg_tiler(batch, vertex_count), - }; - - return framebuffer; -} - -struct bifrost_framebuffer -panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count) -{ - unsigned width = batch->key.width; - unsigned height = batch->key.height; - - struct bifrost_framebuffer framebuffer = { - .unk0 = 0x1e5, /* 1e4 if no spill */ - .width1 = MALI_POSITIVE(width), - .height1 = MALI_POSITIVE(height), - .width2 = MALI_POSITIVE(width), - .height2 = MALI_POSITIVE(height), - - .unk1 = 0x1080, - - .rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs), - .rt_count_2 = 4, - - .unknown2 = 0x1f, - - .scratchpad = panfrost_batch_get_scratchpad(batch)->gpu, - .tiler = panfrost_emit_midg_tiler(batch, vertex_count) - }; - - return framebuffer; -} - static void panfrost_clear( struct pipe_context *pipe, @@ -179,7 +133,7 @@ panfrost_clear( * the existing batch targeting this FBO has draws. We could probably * avoid that by replacing plain clears by quad-draws with a specific * color/depth/stencil value, thus avoiding the generation of extra - * fragment/set_value jobs. + * fragment jobs. */ struct panfrost_batch *batch = panfrost_get_fresh_batch_for_fbo(ctx); @@ -187,42 +141,31 @@ panfrost_clear( panfrost_batch_clear(batch, buffers, color, depth, stencil); } -static mali_ptr -panfrost_attach_vt_mfbd(struct panfrost_batch *batch) -{ - struct bifrost_framebuffer mfbd = panfrost_emit_mfbd(batch, ~0); - - return panfrost_upload_transient(batch, &mfbd, sizeof(mfbd)) | MALI_MFBD; -} - -static mali_ptr -panfrost_attach_vt_sfbd(struct panfrost_batch *batch) -{ - struct mali_single_framebuffer sfbd = panfrost_emit_sfbd(batch, ~0); - - return panfrost_upload_transient(batch, &sfbd, sizeof(sfbd)) | MALI_SFBD; -} +/* TODO: Bifrost requires just a mali_shared_memory, without the rest of the + * framebuffer */ static void panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) { - /* Skip the attach if we can */ - - if (ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer) { - assert(ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer); - return; - } - struct panfrost_screen *screen = pan_screen(ctx->base.screen); struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - if (!batch->framebuffer) - batch->framebuffer = screen->require_sfbd ? - panfrost_attach_vt_sfbd(batch) : - panfrost_attach_vt_mfbd(batch); + /* If we haven't, reserve space for the framebuffer */ + + if (!batch->framebuffer.gpu) { + unsigned size = (screen->quirks & MIDGARD_SFBD) ? + sizeof(struct mali_single_framebuffer) : + sizeof(struct mali_framebuffer); + + batch->framebuffer = panfrost_allocate_transient(batch, size); + + /* Tag the pointer */ + if (!(screen->quirks & MIDGARD_SFBD)) + batch->framebuffer.gpu |= MALI_MFBD; + } for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) - ctx->payloads[i].postfix.framebuffer = batch->framebuffer; + ctx->payloads[i].postfix.shared_memory = batch->framebuffer.gpu; } /* Reset per-frame context, called on context initialisation as well as after @@ -232,13 +175,7 @@ void panfrost_invalidate_frame(struct panfrost_context *ctx) { for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) - ctx->payloads[i].postfix.framebuffer = 0; - - if (ctx->rasterizer) - ctx->dirty |= PAN_DIRTY_RASTERIZER; - - /* XXX */ - ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES; + ctx->payloads[i].postfix.shared_memory = 0; /* TODO: When does this need to be handled? */ ctx->active_queries = true; @@ -263,18 +200,6 @@ panfrost_emit_vertex_payload(struct panfrost_context *ctx) memcpy(&ctx->payloads[PIPE_SHADER_COMPUTE], &payload, sizeof(payload)); } -static void -panfrost_emit_tiler_payload(struct panfrost_context *ctx) -{ - struct midgard_payload_vertex_tiler payload = { - .prefix = { - .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */ - }, - }; - - memcpy(&ctx->payloads[PIPE_SHADER_FRAGMENT], &payload, sizeof(payload)); -} - static unsigned translate_tex_wrap(enum pipe_tex_wrap w) { @@ -282,8 +207,9 @@ translate_tex_wrap(enum pipe_tex_wrap w) case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_REPEAT; - /* TODO: lower GL_CLAMP? */ case PIPE_TEX_WRAP_CLAMP: + return MALI_WRAP_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_CLAMP_TO_EDGE; @@ -293,6 +219,15 @@ translate_tex_wrap(enum pipe_tex_wrap w) case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MIRRORED_REPEAT; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return MALI_WRAP_MIRRORED_CLAMP; + + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE; + + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER; + default: unreachable("Invalid wrap"); } @@ -331,39 +266,6 @@ panfrost_translate_compare_func(enum pipe_compare_func in) } } -static unsigned -panfrost_translate_alt_compare_func(enum pipe_compare_func in) -{ - switch (in) { - case PIPE_FUNC_NEVER: - return MALI_ALT_FUNC_NEVER; - - case PIPE_FUNC_LESS: - return MALI_ALT_FUNC_LESS; - - case PIPE_FUNC_EQUAL: - return MALI_ALT_FUNC_EQUAL; - - case PIPE_FUNC_LEQUAL: - return MALI_ALT_FUNC_LEQUAL; - - case PIPE_FUNC_GREATER: - return MALI_ALT_FUNC_GREATER; - - case PIPE_FUNC_NOTEQUAL: - return MALI_ALT_FUNC_NOTEQUAL; - - case PIPE_FUNC_GEQUAL: - return MALI_ALT_FUNC_GEQUAL; - - case PIPE_FUNC_ALWAYS: - return MALI_ALT_FUNC_ALWAYS; - - default: - unreachable("Invalid alt func"); - } -} - static unsigned panfrost_translate_stencil_op(enum pipe_stencil_op in) { @@ -426,9 +328,8 @@ panfrost_default_shader_backend(struct panfrost_context *ctx) * these earlier chips (perhaps this is a chicken bit of some kind). * More investigation is needed. */ - if (screen->require_sfbd) { + if (screen->quirks & MIDGARD_SFBD) shader.unknown2_4 |= 0x10; - } struct pipe_stencil_state default_stencil = { .enabled = 0, @@ -452,38 +353,6 @@ panfrost_default_shader_backend(struct panfrost_context *ctx) memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); } -/* Generates a vertex/tiler job. This is, in some sense, the heart of the - * graphics command stream. It should be called once per draw, accordding to - * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in - * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for - * vertex jobs. */ - -struct panfrost_transfer -panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler) -{ - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - struct mali_job_descriptor_header job = { - .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX, - .job_descriptor_size = 1, - }; - - struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payloads[PIPE_SHADER_FRAGMENT] : &ctx->payloads[PIPE_SHADER_VERTEX]; - - struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(*payload)); - memcpy(transfer.cpu, &job, sizeof(job)); - memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload)); - return transfer; -} - -mali_ptr -panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i) -{ - struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i]; - struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); - - return rsrc->bo->gpu + buf->buffer_offset; -} - static bool panfrost_writes_point_size(struct panfrost_context *ctx) { @@ -502,7 +371,7 @@ panfrost_stage_attributes(struct panfrost_context *ctx) struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct panfrost_vertex_state *so = ctx->vertex; - size_t sz = sizeof(struct mali_attr_meta) * so->num_elements; + size_t sz = sizeof(struct mali_attr_meta) * PAN_MAX_ATTRIBUTE; struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sz); struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu; @@ -535,21 +404,40 @@ panfrost_stage_attributes(struct panfrost_context *ctx) for (unsigned i = 0; i < so->num_elements; ++i) { unsigned vbi = so->pipe[i].vertex_buffer_index; struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi]; - mali_ptr addr = panfrost_vertex_buffer_address(ctx, vbi); + struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); + mali_ptr addr = rsrc->bo->gpu + buf->buffer_offset; + + /* Adjust by the masked off bits of the offset. Make sure we + * read src_offset from so->hw (which is not GPU visible) + * rather than target (which is) due to caching effects */ - /* Adjust by the masked off bits of the offset */ - target[i].src_offset += (addr & 63); + unsigned src_offset = so->hw[i].src_offset; + src_offset += (addr & 63); /* Also, somewhat obscurely per-instance data needs to be * offset in response to a delayed start in an indexed draw */ - if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start) { - target[i].src_offset -= buf->stride * start; - } - + if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start) + src_offset -= buf->stride * start; + target[i].src_offset = src_offset; } + /* Let's also include vertex builtins */ + + struct mali_attr_meta builtin = { + .format = MALI_R32UI, + .swizzle = panfrost_get_default_swizzle(1) + }; + + /* See mali_attr_meta specification for the magic number */ + + builtin.index = so->vertexid_index; + memcpy(&target[PAN_VERTEX_ID], &builtin, 4); + + builtin.index = so->vertexid_index + 1; + memcpy(&target[PAN_INSTANCE_ID], &builtin, 4); + ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu; } @@ -562,7 +450,7 @@ panfrost_upload_sampler_descriptors(struct panfrost_context *ctx) for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { mali_ptr upload = 0; - if (ctx->sampler_count[t] && ctx->sampler_view_count[t]) { + if (ctx->sampler_count[t]) { size_t transfer_size = desc_size * ctx->sampler_count[t]; struct panfrost_transfer transfer = @@ -581,25 +469,6 @@ panfrost_upload_sampler_descriptors(struct panfrost_context *ctx) } } -static enum mali_texture_layout -panfrost_layout_for_texture(struct panfrost_resource *rsrc) -{ - /* TODO: other linear depth textures */ - bool is_depth = rsrc->base.format == PIPE_FORMAT_Z32_UNORM; - - switch (rsrc->layout) { - case PAN_AFBC: - return MALI_TEXTURE_AFBC; - case PAN_TILED: - assert(!is_depth); - return MALI_TEXTURE_TILED; - case PAN_LINEAR: - return is_depth ? MALI_TEXTURE_TILED : MALI_TEXTURE_LINEAR; - default: - unreachable("Invalid texture layout"); - } -} - static mali_ptr panfrost_upload_tex( struct panfrost_context *ctx, @@ -612,55 +481,18 @@ panfrost_upload_tex( struct pipe_sampler_view *pview = &view->base; struct panfrost_resource *rsrc = pan_resource(pview->texture); - /* Do we interleave an explicit stride with every element? */ - - bool has_manual_stride = view->manual_stride; - - /* For easy access */ - - bool is_buffer = pview->target == PIPE_BUFFER; - unsigned first_level = is_buffer ? 0 : pview->u.tex.first_level; - unsigned last_level = is_buffer ? 0 : pview->u.tex.last_level; - unsigned first_layer = is_buffer ? 0 : pview->u.tex.first_layer; - unsigned last_layer = is_buffer ? 0 : pview->u.tex.last_layer; - - /* Lower-bit is set when sampling from colour AFBC */ - bool is_afbc = rsrc->layout == PAN_AFBC; - bool is_zs = rsrc->base.bind & PIPE_BIND_DEPTH_STENCIL; - unsigned afbc_bit = (is_afbc && !is_zs) ? 1 : 0; - /* Add the BO to the job so it's retained until the job is done. */ struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + panfrost_batch_add_bo(batch, rsrc->bo, PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | panfrost_bo_access_for_stage(st)); - /* Add the usage flags in, since they can change across the CSO - * lifetime due to layout switches */ - - view->hw.format.layout = panfrost_layout_for_texture(rsrc); - view->hw.format.manual_stride = has_manual_stride; - - /* Inject the addresses in, interleaving mip levels, cube faces, and - * strides in that order */ - - unsigned idx = 0; - - for (unsigned l = first_level; l <= last_level; ++l) { - for (unsigned f = first_layer; f <= last_layer; ++f) { - - view->hw.payload[idx++] = - panfrost_get_texture_address(rsrc, l, f) + afbc_bit; - - if (has_manual_stride) { - view->hw.payload[idx++] = - rsrc->slices[l].stride; - } - } - } + panfrost_batch_add_bo(batch, view->bo, + PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | + panfrost_bo_access_for_stage(st)); - return panfrost_upload_transient(batch, &view->hw, - sizeof(struct mali_texture_descriptor)); + return view->bo->gpu; } static void @@ -761,6 +593,29 @@ static void panfrost_upload_ssbo_sysval( uniform->u[2] = sb.buffer_size; } +static void +panfrost_upload_sampler_sysval( + struct panfrost_context *ctx, + enum pipe_shader_type st, + unsigned sampler_index, + struct sysval_uniform *uniform) +{ + struct pipe_sampler_state *sampl = + &ctx->samplers[st][sampler_index]->base; + + uniform->f[0] = sampl->min_lod; + uniform->f[1] = sampl->max_lod; + uniform->f[2] = sampl->lod_bias; + + /* Even without any errata, Midgard represents "no mipmapping" as + * fixing the LOD with the clamps; keep behaviour consistent. c.f. + * panfrost_create_sampler_state which also explains our choice of + * epsilon value (again to keep behaviour consistent) */ + + if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) + uniform->f[1] = uniform->f[0] + (1.0/256.0); +} + static void panfrost_upload_num_work_groups_sysval(struct panfrost_context *ctx, struct sysval_uniform *uniform) { @@ -796,7 +651,10 @@ static void panfrost_upload_sysvals(struct panfrost_context *ctx, void *buf, case PAN_SYSVAL_NUM_WORK_GROUPS: panfrost_upload_num_work_groups_sysval(ctx, &uniforms[i]); break; - + case PAN_SYSVAL_SAMPLER: + panfrost_upload_sampler_sysval(ctx, st, PAN_SYSVAL_ID(sysval), + &uniforms[i]); + break; default: assert(0); } @@ -833,9 +691,11 @@ panfrost_map_constant_buffer_gpu( PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | panfrost_bo_access_for_stage(st)); - return rsrc->bo->gpu; + + /* Alignment gauranteed by PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */ + return rsrc->bo->gpu + cb->buffer_offset; } else if (cb->user_buffer) { - return panfrost_upload_transient(batch, cb->user_buffer, cb->buffer_size); + return panfrost_upload_transient(batch, cb->user_buffer + cb->buffer_offset, cb->buffer_size); } else { unreachable("No constant buffer"); } @@ -853,20 +713,25 @@ panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage) return 32 - __builtin_clz(mask); } -/* Fixes up a shader state with current state, returning a GPU address to the - * patched shader */ +/* Fixes up a shader state with current state */ -static mali_ptr -panfrost_patch_shader_state( - struct panfrost_context *ctx, - struct panfrost_shader_state *ss, - enum pipe_shader_type stage, - bool should_upload) +static void +panfrost_patch_shader_state(struct panfrost_context *ctx, + enum pipe_shader_type stage) { + struct panfrost_shader_variants *all = ctx->shader[stage]; + + if (!all) { + ctx->payloads[stage].postfix.shader = 0; + return; + } + + struct panfrost_shader_state *ss = &all->variants[all->active_variant]; + ss->tripipe->texture_count = ctx->sampler_view_count[stage]; ss->tripipe->sampler_count = ctx->sampler_count[stage]; - ss->tripipe->midgard1.flags = 0x220; + ss->tripipe->midgard1.flags_lo = 0x220; unsigned ubo_count = panfrost_ubo_count(ctx, stage); ss->tripipe->midgard1.uniform_buffer_count = ubo_count; @@ -879,36 +744,9 @@ panfrost_patch_shader_state( PAN_BO_ACCESS_READ | panfrost_bo_access_for_stage(stage)); - /* We can't reuse over frames; that's not safe. The descriptor must be - * transient uploaded */ - - if (should_upload) { - return panfrost_upload_transient(batch, ss->tripipe, - sizeof(struct mali_shader_meta)); - } - - /* If we don't need an upload, don't bother */ - return 0; - -} - -static void -panfrost_patch_shader_state_compute( - struct panfrost_context *ctx, - enum pipe_shader_type stage, - bool should_upload) -{ - struct panfrost_shader_variants *all = ctx->shader[stage]; - - if (!all) { - ctx->payloads[stage].postfix.shader = 0; - return; - } - - struct panfrost_shader_state *s = &all->variants[all->active_variant]; - - ctx->payloads[stage].postfix.shader = - panfrost_patch_shader_state(ctx, s, stage, should_upload); + ctx->payloads[stage].postfix.shader = panfrost_upload_transient(batch, + ss->tripipe, + sizeof(struct mali_shader_meta)); } /* Go through dirty flags and actualise them in the cmdstream. */ @@ -930,9 +768,9 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_emit_varying_descriptor(ctx, total_count); } - bool msaa = ctx->rasterizer->base.multisample; - if (ctx->dirty & PAN_DIRTY_RASTERIZER) { + if (ctx->rasterizer) { + bool msaa = ctx->rasterizer->base.multisample; ctx->payloads[PIPE_SHADER_FRAGMENT].gl_enables = ctx->rasterizer->tiler_gl_enables; /* TODO: Sample size */ @@ -947,29 +785,28 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.occlusion_counter = ctx->occlusion_query->bo->gpu; } - panfrost_patch_shader_state_compute(ctx, PIPE_SHADER_VERTEX, true); - panfrost_patch_shader_state_compute(ctx, PIPE_SHADER_COMPUTE, true); + panfrost_patch_shader_state(ctx, PIPE_SHADER_VERTEX); + panfrost_patch_shader_state(ctx, PIPE_SHADER_COMPUTE); - if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { + if (ctx->shader[PIPE_SHADER_VERTEX] && ctx->shader[PIPE_SHADER_FRAGMENT]) { /* Check if we need to link the gl_PointSize varying */ if (!panfrost_writes_point_size(ctx)) { /* If the size is constant, write it out. Otherwise, * don't touch primitive_size (since we would clobber * the pointer there) */ - ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.constant = ctx->rasterizer->base.line_width; + bool points = ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode == MALI_POINTS; + + ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.constant = points ? + ctx->rasterizer->base.point_size : + ctx->rasterizer->base.line_width; } } - /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */ - if (ctx->shader[PIPE_SHADER_FRAGMENT]) - ctx->dirty |= PAN_DIRTY_FS; - - if (ctx->dirty & PAN_DIRTY_FS) { - assert(ctx->shader[PIPE_SHADER_FRAGMENT]); + if (ctx->shader[PIPE_SHADER_FRAGMENT]) { struct panfrost_shader_state *variant = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant]; - panfrost_patch_shader_state(ctx, variant, PIPE_SHADER_FRAGMENT, false); + panfrost_patch_shader_state(ctx, PIPE_SHADER_FRAGMENT); #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name @@ -981,8 +818,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) COPY(midgard1.uniform_count); COPY(midgard1.uniform_buffer_count); COPY(midgard1.work_count); - COPY(midgard1.flags); - COPY(midgard1.unknown2); + COPY(midgard1.flags_lo); + COPY(midgard1.flags_hi); #undef COPY @@ -990,9 +827,12 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS]; + unsigned shader_offset = 0; + struct panfrost_bo *shader_bo = NULL; - for (unsigned c = 0; c < rt_count; ++c) - blend[c] = panfrost_get_blend_for_context(ctx, c); + for (unsigned c = 0; c < rt_count; ++c) { + blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, &shader_offset); + } /* If there is a blend shader, work registers are shared. XXX: opt */ @@ -1001,22 +841,22 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ctx->fragment_shader_core.midgard1.work_count = 16; } - /* Set late due to depending on render state */ - unsigned flags = ctx->fragment_shader_core.midgard1.flags; - /* Depending on whether it's legal to in the given shader, we * try to enable early-z testing (or forward-pixel kill?) */ - if (!variant->can_discard) - flags |= MALI_EARLY_Z; + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_EARLY_Z, + !variant->can_discard && !variant->writes_depth); + + /* Add the writes Z/S flags if needed. */ + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, + MALI_WRITES_Z, variant->writes_depth); + SET_BIT(ctx->fragment_shader_core.midgard1.flags_hi, + MALI_WRITES_S, variant->writes_stencil); /* Any time texturing is used, derivatives are implicitly * calculated, so we need to enable helper invocations */ - if (variant->helper_invocations) - flags |= MALI_HELPER_INVOCATIONS; - - ctx->fragment_shader_core.midgard1.flags = flags; + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_HELPER_INVOCATIONS, variant->helper_invocations); /* Assign the stencil refs late */ @@ -1034,41 +874,41 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) * thing?" by Peter Harris */ - if (variant->can_discard) { - ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; - ctx->fragment_shader_core.midgard1.flags |= 0x400; - } + SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, variant->can_discard); + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, 0x400, variant->can_discard); /* Even on MFBD, the shader descriptor gets blend shaders. It's * *also* copied to the blend_meta appended (by convention), * but this is the field actually read by the hardware. (Or - * maybe both are read...?) */ + * maybe both are read...?). Specify the last RTi with a blend + * shader. */ - if (blend[0].is_shader) { - ctx->fragment_shader_core.blend.shader = - blend[0].shader.bo->gpu | blend[0].shader.first_tag; - } else { - ctx->fragment_shader_core.blend.shader = 0; + ctx->fragment_shader_core.blend.shader = 0; + + for (signed rt = (rt_count - 1); rt >= 0; --rt) { + if (blend[rt].is_shader) { + ctx->fragment_shader_core.blend.shader = + blend[rt].shader.gpu | blend[rt].shader.first_tag; + break; + } } - if (screen->require_sfbd) { + if (screen->quirks & MIDGARD_SFBD) { /* When only a single render target platform is used, the blend * information is inside the shader meta itself. We * additionally need to signal CAN_DISCARD for nontrivial blend * modes (so we're able to read back the destination buffer) */ - if (blend[0].is_shader) { - ctx->fragment_shader_core.unknown2_3 |= MALI_HAS_BLEND_SHADER; - } else { + SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_BLEND_SHADER, blend[0].is_shader); + + if (!blend[0].is_shader) { ctx->fragment_shader_core.blend.equation = *blend[0].equation.equation; ctx->fragment_shader_core.blend.constant = blend[0].equation.constant; } - if (!blend[0].no_blending) { - ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; - } + SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, !blend[0].no_blending); } size_t size = sizeof(struct mali_shader_meta) + (sizeof(struct midgard_blend_rt) * rt_count); @@ -1077,7 +917,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.shader = transfer.gpu; - if (!screen->require_sfbd) { + if (!(screen->quirks & MIDGARD_SFBD)) { /* Additional blend descriptor tacked on for jobs using MFBD */ struct midgard_blend_rt rts[4]; @@ -1095,16 +935,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb); SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither); - /* TODO: sRGB in blend shaders is currently - * unimplemented. Contact me (Alyssa) if you're - * interested in working on this. We have - * native Midgard ops for helping here, but - * they're not well-understood yet. */ - - assert(!(is_srgb && blend[i].is_shader)); - if (blend[i].is_shader) { - rts[i].blend.shader = blend[i].shader.bo->gpu | blend[i].shader.first_tag; + rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag; } else { rts[i].blend.equation = *blend[i].equation.equation; rts[i].blend.constant = blend[i].equation.constant; @@ -1119,11 +951,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) if (ctx->vertex) panfrost_stage_attributes(ctx); - if (ctx->dirty & PAN_DIRTY_SAMPLERS) - panfrost_upload_sampler_descriptors(ctx); - - if (ctx->dirty & PAN_DIRTY_TEXTURES) - panfrost_upload_texture_descriptors(ctx); + panfrost_upload_sampler_descriptors(ctx); + panfrost_upload_texture_descriptors(ctx); const struct pipe_viewport_state *vp = &ctx->pipe_viewport; @@ -1150,7 +979,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_upload_sysvals(ctx, transfer.cpu, ss, i); /* Upload uniforms */ - if (has_uniforms) { + if (has_uniforms && uniform_size) { const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0); memcpy(transfer.cpu + sys_size, cpu, uniform_size); } @@ -1167,12 +996,11 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unsigned ubo_count = panfrost_ubo_count(ctx, i); assert(ubo_count >= 1); - size_t sz = sizeof(struct mali_uniform_buffer_meta) * ubo_count; - struct mali_uniform_buffer_meta ubos[PAN_MAX_CONST_BUFFERS]; + size_t sz = sizeof(uint64_t) * ubo_count; + uint64_t ubos[PAN_MAX_CONST_BUFFERS]; /* Upload uniforms as a UBO */ - ubos[0].size = MALI_POSITIVE((2 + uniform_count)); - ubos[0].ptr = transfer.gpu >> 2; + ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu); /* The rest are honest-to-goodness UBOs */ @@ -1184,9 +1012,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) if (!enabled || empty) { /* Stub out disabled UBOs to catch accesses */ - - ubos[ubo].size = 0; - ubos[ubo].ptr = 0xDEAD0000; + ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000); continue; } @@ -1194,10 +1020,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unsigned bytes_per_field = 16; unsigned aligned = ALIGN_POT(usz, bytes_per_field); - unsigned fields = aligned / bytes_per_field; - - ubos[ubo].size = MALI_POSITIVE(fields); - ubos[ubo].ptr = gpu >> 2; + ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu); } mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz); @@ -1303,8 +1126,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_upload_transient(batch, &view, sizeof(struct mali_viewport)); - - ctx->dirty = 0; } /* Corresponds to exactly one draw, but does not submit anything */ @@ -1320,20 +1141,33 @@ panfrost_queue_draw(struct panfrost_context *ctx) bool rasterizer_discard = ctx->rasterizer && ctx->rasterizer->base.rasterizer_discard; - struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false); - struct panfrost_transfer tiler; - if (!rasterizer_discard) - tiler = panfrost_vertex_tiler_job(ctx, true); + struct midgard_payload_vertex_tiler *vertex_payload = &ctx->payloads[PIPE_SHADER_VERTEX]; + struct midgard_payload_vertex_tiler *tiler_payload = &ctx->payloads[PIPE_SHADER_FRAGMENT]; struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep; - if (rasterizer_discard) - panfrost_scoreboard_queue_vertex_job(batch, vertex, FALSE); - else if (ctx->wallpaper_batch && batch->first_tiler.gpu) - panfrost_scoreboard_queue_fused_job_prepend(batch, vertex, tiler); - else - panfrost_scoreboard_queue_fused_job(batch, vertex, tiler); + if (wallpapering) { + /* Inject in reverse order, with "predicted" job indices. THIS IS A HACK XXX */ + panfrost_new_job(batch, JOB_TYPE_TILER, false, batch->job_index + 2, tiler_payload, sizeof(*tiler_payload), true); + panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), true); + } else { + unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), false); + + if (!rasterizer_discard) + panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tiler_payload, sizeof(*tiler_payload), false); + } + + for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) { + struct panfrost_shader_variants *all = ctx->shader[i]; + + if (!all) + continue; + + struct panfrost_shader_state *ss = &all->variants[all->active_variant]; + batch->stack_size = MAX2(batch->stack_size, ss->stack_size); + } } /* The entire frame is in memory -- send it off to the kernel! */ @@ -1375,6 +1209,9 @@ panfrost_flush( util_dynarray_fini(&fences); } + + if (pan_debug & PAN_DBG_TRACE) + pandecode_next_frame(); } #define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c; @@ -1420,15 +1257,27 @@ panfrost_translate_index_size(unsigned size) } /* Gets a GPU address for the associated index buffer. Only gauranteed to be - * good for the duration of the draw (transient), could last longer */ + * good for the duration of the draw (transient), could last longer. Also get + * the bounds on the index buffer for the range accessed by the draw. We do + * these operations together because there are natural optimizations which + * require them to be together. */ static mali_ptr -panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info) +panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pipe_draw_info *info, unsigned *min_index, unsigned *max_index) { struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource); off_t offset = info->start * info->index_size; struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + mali_ptr out = 0; + + bool needs_indices = true; + + if (info->max_index != ~0u) { + *min_index = info->min_index; + *max_index = info->max_index; + needs_indices = false; + } if (!info->has_user_indices) { /* Only resources can be directly mapped */ @@ -1436,12 +1285,29 @@ panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | PAN_BO_ACCESS_VERTEX_TILER); - return rsrc->bo->gpu + offset; + out = rsrc->bo->gpu + offset; + + /* Check the cache */ + needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, info->start, info->count, + min_index, max_index); } else { /* Otherwise, we need to upload to transient memory */ const uint8_t *ibuf8 = (const uint8_t *) info->index.user; - return panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size); + out = panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size); } + + if (needs_indices) { + /* Fallback */ + u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index); + + if (!info->has_user_indices) { + panfrost_minmax_cache_add(rsrc->index_cache, info->start, info->count, + *min_index, *max_index); + } + } + + + return out; } static bool @@ -1549,38 +1415,19 @@ panfrost_draw_vbo( if (info->primitive_restart) draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX; - /* For higher amounts of vertices (greater than what fits in a 16-bit - * short), the other value is needed, otherwise there will be bizarre - * rendering artefacts. It's not clear what these values mean yet. This - * change is also needed for instancing and sometimes points (perhaps - * related to dynamically setting gl_PointSize) */ + /* These doesn't make much sense */ - bool is_points = mode == PIPE_PRIM_POINTS; - bool many_verts = ctx->vertex_count > 0xFFFF; - bool instanced = ctx->instance_count > 1; + draw_flags |= 0x3000; - draw_flags |= (is_points || many_verts || instanced) ? 0x3000 : 0x18000; - - /* This doesn't make much sense */ - if (mode == PIPE_PRIM_LINE_STRIP) { - draw_flags |= 0x800; - } + if (ctx->rasterizer && ctx->rasterizer->base.flatshade_first) + draw_flags |= MALI_DRAW_FLATSHADE_FIRST; panfrost_statistics_record(ctx, info); if (info->index_size) { - /* Calculate the min/max index used so we can figure out how - * many times to invoke the vertex shader */ - - /* Fetch / calculate index bounds */ unsigned min_index = 0, max_index = 0; - - if (info->max_index == ~0u) { - u_vbuf_get_minmax_index(pipe, info, &min_index, &max_index); - } else { - min_index = info->min_index; - max_index = info->max_index; - } + ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = + panfrost_get_index_buffer_bounded(ctx, info, &min_index, &max_index); /* Use the corresponding values */ vertex_count = max_index - min_index + 1; @@ -1590,10 +1437,7 @@ panfrost_draw_vbo( ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index; ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count); - //assert(!info->restart_index); /* TODO: Research */ - draw_flags |= panfrost_translate_index_size(info->index_size); - ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = panfrost_get_index_buffer_mapped(ctx, info); } else { /* Index count == vertex count, if no indexing is applied, as * if it is internally indexed in the expected order */ @@ -1602,7 +1446,7 @@ panfrost_draw_vbo( ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(ctx->vertex_count); /* Reverse index state */ - ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (u64) NULL; + ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (mali_ptr) 0; } /* Dispatch "compute jobs" for the vertex/tiler pair as (1, @@ -1619,24 +1463,16 @@ panfrost_draw_vbo( /* Encode the padded vertex count */ if (info->instance_count > 1) { - /* Triangles have non-even vertex counts so they change how - * padding works internally */ - - bool is_triangle = - mode == PIPE_PRIM_TRIANGLES || - mode == PIPE_PRIM_TRIANGLE_STRIP || - mode == PIPE_PRIM_TRIANGLE_FAN; - - struct pan_shift_odd so = - panfrost_padded_vertex_count(vertex_count, !is_triangle); + ctx->padded_count = panfrost_padded_vertex_count(vertex_count); - ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = so.shift; - ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = so.shift; + unsigned shift = __builtin_ctz(ctx->padded_count); + unsigned k = ctx->padded_count >> (shift + 1); - ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = so.odd; - ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = so.odd; + ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = shift; + ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = shift; - ctx->padded_count = pan_expand_shift_odd(so); + ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = k; + ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = k; } else { ctx->padded_count = vertex_count; @@ -1699,13 +1535,11 @@ panfrost_bind_rasterizer_state( { struct panfrost_context *ctx = pan_context(pctx); - /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */ + ctx->rasterizer = hwcso; + if (!hwcso) return; - ctx->rasterizer = hwcso; - ctx->dirty |= PAN_DIRTY_RASTERIZER; - ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units * 2.0f; ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale; @@ -1760,15 +1594,14 @@ panfrost_bind_vertex_elements_state( void *hwcso) { struct panfrost_context *ctx = pan_context(pctx); - ctx->vertex = hwcso; - ctx->dirty |= PAN_DIRTY_VERTEX; } static void * panfrost_create_shader_state( struct pipe_context *pctx, - const struct pipe_shader_state *cso) + const struct pipe_shader_state *cso, + enum pipe_shader_type stage) { struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants); so->base = *cso; @@ -1778,6 +1611,21 @@ panfrost_create_shader_state( if (cso->type == PIPE_SHADER_IR_TGSI) so->base.tokens = tgsi_dup_tokens(so->base.tokens); + /* Precompile for shader-db if we need to */ + if (unlikely((pan_debug & PAN_DBG_PRECOMPILE) && cso->type == PIPE_SHADER_IR_NIR)) { + struct panfrost_context *ctx = pan_context(pctx); + + struct mali_shader_meta meta; + struct panfrost_shader_state state; + uint64_t outputs_written; + + panfrost_shader_compile(ctx, &meta, + PIPE_SHADER_IR_NIR, + so->base.ir.nir, + tgsi_processor_to_shader_stage(stage), &state, + &outputs_written); + } + return so; } @@ -1797,6 +1645,7 @@ panfrost_delete_shader_state( panfrost_bo_unreference(shader_state->bo); shader_state->bo = NULL; } + free(cso->variants); free(so); } @@ -1826,15 +1675,18 @@ panfrost_create_sampler_state( .wrap_s = translate_tex_wrap(cso->wrap_s), .wrap_t = translate_tex_wrap(cso->wrap_t), .wrap_r = translate_tex_wrap(cso->wrap_r), - .compare_func = panfrost_translate_alt_compare_func(cso->compare_func), + .compare_func = panfrost_flip_compare_func( + panfrost_translate_compare_func( + cso->compare_func)), .border_color = { cso->border_color.f[0], cso->border_color.f[1], cso->border_color.f[2], cso->border_color.f[3] }, - .min_lod = FIXED_16(cso->min_lod), - .max_lod = FIXED_16(cso->max_lod), + .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */ + .max_lod = FIXED_16(cso->max_lod, false), + .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */ .seamless_cube_map = cso->seamless_cube_map, }; @@ -1843,16 +1695,17 @@ panfrost_create_sampler_state( * essentially -- remember these are fixed point numbers, so * epsilon=1/256) */ - if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) + if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { sampler_descriptor.max_lod = sampler_descriptor.min_lod; - /* Enforce that there is something in the middle by adding epsilon*/ + /* Enforce that there is something in the middle by adding epsilon*/ - if (sampler_descriptor.min_lod == sampler_descriptor.max_lod) - sampler_descriptor.max_lod++; + if (sampler_descriptor.min_lod == sampler_descriptor.max_lod) + sampler_descriptor.max_lod++; - /* Sanity check */ - assert(sampler_descriptor.max_lod > sampler_descriptor.min_lod); + /* Sanity check */ + assert(sampler_descriptor.max_lod > sampler_descriptor.min_lod); + } so->hw = sampler_descriptor; @@ -1873,8 +1726,6 @@ panfrost_bind_sampler_states( /* XXX: Should upload, not just copy? */ ctx->sampler_count[shader] = num_sampler; memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *)); - - ctx->dirty |= PAN_DIRTY_SAMPLERS; } static bool @@ -1943,7 +1794,7 @@ update_so_info(struct pipe_stream_output_info *so_info, uint64_t outputs_written) { uint64_t so_outputs = 0; - uint8_t reverse_map[64] = {}; + uint8_t reverse_map[64] = {0}; unsigned slot = 0; while (outputs_written) @@ -1968,14 +1819,8 @@ panfrost_bind_shader_state( enum pipe_shader_type type) { struct panfrost_context *ctx = pan_context(pctx); - ctx->shader[type] = hwcso; - if (type == PIPE_SHADER_FRAGMENT) - ctx->dirty |= PAN_DIRTY_FS; - else - ctx->dirty |= PAN_DIRTY_VS; - if (!hwcso) return; /* Match the appropriate variant */ @@ -1993,7 +1838,25 @@ panfrost_bind_shader_state( if (variant == -1) { /* No variant matched, so create a new one */ variant = variants->variant_count++; - assert(variants->variant_count < MAX_SHADER_VARIANTS); + + if (variants->variant_count > variants->variant_space) { + unsigned old_space = variants->variant_space; + + variants->variant_space *= 2; + if (variants->variant_space == 0) + variants->variant_space = 1; + + /* Arbitrary limit to stop runaway programs from + * creating an unbounded number of shader variants. */ + assert(variants->variant_space < 1024); + + unsigned msize = sizeof(struct panfrost_shader_state); + variants->variants = realloc(variants->variants, + variants->variant_space * msize); + + memset(&variants->variants[old_space], 0, + (variants->variant_space - old_space) * msize); + } struct panfrost_shader_state *v = &variants->variants[variant]; @@ -2043,6 +1906,18 @@ panfrost_bind_shader_state( } } +static void * +panfrost_create_vs_state(struct pipe_context *pctx, const struct pipe_shader_state *hwcso) +{ + return panfrost_create_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX); +} + +static void * +panfrost_create_fs_state(struct pipe_context *pctx, const struct pipe_shader_state *hwcso) +{ + return panfrost_create_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT); +} + static void panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso) { @@ -2097,9 +1972,6 @@ panfrost_set_stencil_ref( { struct panfrost_context *ctx = pan_context(pctx); ctx->stencil_ref = *ref; - - /* Shader core dirty */ - ctx->dirty |= PAN_DIRTY_FS; } static enum mali_texture_type @@ -2134,8 +2006,8 @@ panfrost_create_sampler_view( struct pipe_resource *texture, const struct pipe_sampler_view *template) { + struct panfrost_screen *screen = pan_screen(pctx->screen); struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view); - int bytes_per_pixel = util_format_get_blocksize(texture->format); pipe_reference(NULL, &texture->reference); @@ -2147,12 +2019,6 @@ panfrost_create_sampler_view( so->base.reference.count = 1; so->base.context = pctx; - /* sampler_views correspond to texture descriptors, minus the texture - * (data) itself. So, we serialise the descriptor here and cache it for - * later. */ - - const struct util_format_description *desc = util_format_description(prsrc->base.format); - unsigned char user_swizzle[4] = { template->swizzle_r, template->swizzle_g, @@ -2160,29 +2026,6 @@ panfrost_create_sampler_view( template->swizzle_a }; - enum mali_format format = panfrost_find_format(desc); - - /* Check if we need to set a custom stride by computing the "expected" - * stride and comparing it to what the BO actually wants. Only applies - * to linear textures, since tiled/compressed textures have strict - * alignment requirements for their strides as it is */ - - unsigned first_level = template->u.tex.first_level; - unsigned last_level = template->u.tex.last_level; - - if (prsrc->layout == PAN_LINEAR) { - for (unsigned l = first_level; l <= last_level; ++l) { - unsigned actual_stride = prsrc->slices[l].stride; - unsigned width = u_minify(texture->width0, l); - unsigned comp_stride = width * bytes_per_pixel; - - if (comp_stride != actual_stride) { - so->manual_stride = true; - break; - } - } - } - /* In the hardware, array_size refers specifically to array textures, * whereas in Gallium, it also covers cubemaps */ @@ -2194,26 +2037,32 @@ panfrost_create_sampler_view( array_size /= 6; } - struct mali_texture_descriptor texture_descriptor = { - .width = MALI_POSITIVE(u_minify(texture->width0, first_level)), - .height = MALI_POSITIVE(u_minify(texture->height0, first_level)), - .depth = MALI_POSITIVE(u_minify(texture->depth0, first_level)), - .array_size = MALI_POSITIVE(array_size), - - .format = { - .swizzle = panfrost_translate_swizzle_4(desc->swizzle), - .format = format, - .srgb = desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB, - .type = panfrost_translate_texture_type(template->target), - .unknown2 = 0x1, - }, - - .swizzle = panfrost_translate_swizzle_4(user_swizzle) - }; - - texture_descriptor.levels = last_level - first_level; - - so->hw = texture_descriptor; + enum mali_texture_type type = + panfrost_translate_texture_type(template->target); + + unsigned size = panfrost_estimate_texture_size( + template->u.tex.first_level, + template->u.tex.last_level, + template->u.tex.first_layer, + template->u.tex.last_layer, + type, prsrc->layout); + + so->bo = panfrost_bo_create(screen, size, 0); + + panfrost_new_texture( + so->bo->cpu, + texture->width0, texture->height0, + texture->depth0, array_size, + texture->format, + type, prsrc->layout, + template->u.tex.first_level, + template->u.tex.last_level, + template->u.tex.first_layer, + template->u.tex.last_layer, + prsrc->cubemap_stride, + panfrost_translate_swizzle_4(user_swizzle), + prsrc->bo->gpu, + prsrc->slices); return (struct pipe_sampler_view *) so; } @@ -2226,27 +2075,34 @@ panfrost_set_sampler_views( struct pipe_sampler_view **views) { struct panfrost_context *ctx = pan_context(pctx); + unsigned new_nr = 0; + unsigned i; assert(start_slot == 0); - unsigned new_nr = 0; - for (unsigned i = 0; i < num_views; ++i) { + for (i = 0; i < num_views; ++i) { if (views[i]) new_nr = i + 1; + pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i], + views[i]); } + for (; i < ctx->sampler_view_count[shader]; i++) { + pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i], + NULL); + } ctx->sampler_view_count[shader] = new_nr; - memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *)); - - ctx->dirty |= PAN_DIRTY_TEXTURES; } static void panfrost_sampler_view_destroy( struct pipe_context *pctx, - struct pipe_sampler_view *view) + struct pipe_sampler_view *pview) { - pipe_resource_reference(&view->texture, NULL); + struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview; + + pipe_resource_reference(&pview->texture, NULL); + panfrost_bo_unreference(view->bo); ralloc_free(view); } @@ -2279,14 +2135,14 @@ panfrost_hint_afbc( for (unsigned i = 0; i < fb->nr_cbufs; ++i) { struct pipe_surface *surf = fb->cbufs[i]; struct panfrost_resource *rsrc = pan_resource(surf->texture); - panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1); + panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1); } /* Also hint it to the depth buffer */ if (fb->zsbuf) { struct panfrost_resource *rsrc = pan_resource(fb->zsbuf->texture); - panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1); + panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1); } } @@ -2352,8 +2208,6 @@ panfrost_bind_depth_stencil_state(struct pipe_context *pipe, /* Bounds test not implemented */ assert(!depth_stencil->depth.bounds_test); - - ctx->dirty |= PAN_DIRTY_FS; } static void @@ -2494,7 +2348,7 @@ panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) break; default: - fprintf(stderr, "Skipping query %u\n", query->type); + DBG("Skipping query %u\n", query->type); break; } @@ -2626,7 +2480,6 @@ struct pipe_context * panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct panfrost_context *ctx = rzalloc(screen, struct panfrost_context); - struct panfrost_screen *pscreen = pan_screen(screen); struct pipe_context *gallium = (struct pipe_context *) ctx; gallium->screen = screen; @@ -2657,11 +2510,11 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state; gallium->delete_vertex_elements_state = panfrost_generic_cso_delete; - gallium->create_fs_state = panfrost_create_shader_state; + gallium->create_fs_state = panfrost_create_fs_state; gallium->delete_fs_state = panfrost_delete_shader_state; gallium->bind_fs_state = panfrost_bind_fs_state; - gallium->create_vs_state = panfrost_create_shader_state; + gallium->create_vs_state = panfrost_create_vs_state; gallium->delete_vs_state = panfrost_delete_shader_state; gallium->bind_vs_state = panfrost_bind_vs_state; @@ -2715,7 +2568,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) panfrost_batch_init(ctx); panfrost_emit_vertex_payload(ctx); - panfrost_emit_tiler_payload(ctx); panfrost_invalidate_frame(ctx); panfrost_default_shader_backend(ctx);