X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fpanfrost%2Fpan_context.c;h=ff00c2129bf1ee76acca620f704e0c5a40e00ccf;hb=b929565ea8d4846b1943f35b6e86d685a616ed54;hp=f62d891e1964493e0b82a1d0427f699ab91a02a6;hpb=ab81a23d36fb4a87f3ac3ef81d333295d54002a1;p=mesa.git diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index f62d891e196..ff00c2129bf 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -29,7 +29,6 @@ #include "pan_bo.h" #include "pan_context.h" -#include "pan_format.h" #include "panfrost-quirks.h" #include "util/macros.h" @@ -52,10 +51,9 @@ #include "pan_blending.h" #include "pan_blend_shaders.h" #include "pan_util.h" +#include "pandecode/decode.h" -/* Framebuffer descriptor */ - -static struct midgard_tiler_descriptor +struct midgard_tiler_descriptor panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count) { struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen); @@ -109,7 +107,7 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count) t.hierarchy_mask = MALI_TILER_USER; t.polygon_list_size = MALI_TILER_MINIMUM_HEADER_SIZE + 4; - /* We don't have a SET_VALUE job, so write the polygon list manually */ + /* We don't have a WRITE_VALUE job, so write the polygon list manually */ uint32_t *polygon_list_body = (uint32_t *) (tiler_dummy->cpu + header_size); polygon_list_body[0] = 0xa0000000; /* TODO: Just that? */ } @@ -121,54 +119,6 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count) return t; } -struct mali_single_framebuffer -panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count) -{ - unsigned width = batch->key.width; - unsigned height = batch->key.height; - - struct mali_single_framebuffer framebuffer = { - .width = MALI_POSITIVE(width), - .height = MALI_POSITIVE(height), - .unknown2 = 0x1f, - .format = { - .unk3 = 0x3, - }, - .clear_flags = 0x1000, - .unknown_address_0 = panfrost_batch_get_scratchpad(batch)->gpu, - .tiler = panfrost_emit_midg_tiler(batch, vertex_count), - }; - - return framebuffer; -} - -struct bifrost_framebuffer -panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count) -{ - unsigned width = batch->key.width; - unsigned height = batch->key.height; - - struct bifrost_framebuffer framebuffer = { - .unk0 = 0x1e5, /* 1e4 if no spill */ - .width1 = MALI_POSITIVE(width), - .height1 = MALI_POSITIVE(height), - .width2 = MALI_POSITIVE(width), - .height2 = MALI_POSITIVE(height), - - .unk1 = 0x1080, - - .rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs), - .rt_count_2 = 4, - - .unknown2 = 0x1f, - - .scratchpad = panfrost_batch_get_scratchpad(batch)->gpu, - .tiler = panfrost_emit_midg_tiler(batch, vertex_count) - }; - - return framebuffer; -} - static void panfrost_clear( struct pipe_context *pipe, @@ -182,7 +132,7 @@ panfrost_clear( * the existing batch targeting this FBO has draws. We could probably * avoid that by replacing plain clears by quad-draws with a specific * color/depth/stencil value, thus avoiding the generation of extra - * fragment/set_value jobs. + * fragment jobs. */ struct panfrost_batch *batch = panfrost_get_fresh_batch_for_fbo(ctx); @@ -190,42 +140,31 @@ panfrost_clear( panfrost_batch_clear(batch, buffers, color, depth, stencil); } -static mali_ptr -panfrost_attach_vt_mfbd(struct panfrost_batch *batch) -{ - struct bifrost_framebuffer mfbd = panfrost_emit_mfbd(batch, ~0); - - return panfrost_upload_transient(batch, &mfbd, sizeof(mfbd)) | MALI_MFBD; -} - -static mali_ptr -panfrost_attach_vt_sfbd(struct panfrost_batch *batch) -{ - struct mali_single_framebuffer sfbd = panfrost_emit_sfbd(batch, ~0); - - return panfrost_upload_transient(batch, &sfbd, sizeof(sfbd)) | MALI_SFBD; -} +/* TODO: Bifrost requires just a mali_shared_memory, without the rest of the + * framebuffer */ static void panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) { - /* Skip the attach if we can */ - - if (ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer) { - assert(ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer); - return; - } - struct panfrost_screen *screen = pan_screen(ctx->base.screen); struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - if (!batch->framebuffer) - batch->framebuffer = (screen->quirks & MIDGARD_SFBD) ? - panfrost_attach_vt_sfbd(batch) : - panfrost_attach_vt_mfbd(batch); + /* If we haven't, reserve space for the framebuffer */ + + if (!batch->framebuffer.gpu) { + unsigned size = (screen->quirks & MIDGARD_SFBD) ? + sizeof(struct mali_single_framebuffer) : + sizeof(struct mali_framebuffer); + + batch->framebuffer = panfrost_allocate_transient(batch, size); + + /* Tag the pointer */ + if (!(screen->quirks & MIDGARD_SFBD)) + batch->framebuffer.gpu |= MALI_MFBD; + } for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) - ctx->payloads[i].postfix.framebuffer = batch->framebuffer; + ctx->payloads[i].postfix.shared_memory = batch->framebuffer.gpu; } /* Reset per-frame context, called on context initialisation as well as after @@ -235,13 +174,7 @@ void panfrost_invalidate_frame(struct panfrost_context *ctx) { for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) - ctx->payloads[i].postfix.framebuffer = 0; - - if (ctx->rasterizer) - ctx->dirty |= PAN_DIRTY_RASTERIZER; - - /* XXX */ - ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES; + ctx->payloads[i].postfix.shared_memory = 0; /* TODO: When does this need to be handled? */ ctx->active_queries = true; @@ -266,18 +199,6 @@ panfrost_emit_vertex_payload(struct panfrost_context *ctx) memcpy(&ctx->payloads[PIPE_SHADER_COMPUTE], &payload, sizeof(payload)); } -static void -panfrost_emit_tiler_payload(struct panfrost_context *ctx) -{ - struct midgard_payload_vertex_tiler payload = { - .prefix = { - .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */ - }, - }; - - memcpy(&ctx->payloads[PIPE_SHADER_FRAGMENT], &payload, sizeof(payload)); -} - static unsigned translate_tex_wrap(enum pipe_tex_wrap w) { @@ -285,8 +206,9 @@ translate_tex_wrap(enum pipe_tex_wrap w) case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_REPEAT; - /* TODO: lower GL_CLAMP? */ case PIPE_TEX_WRAP_CLAMP: + return MALI_WRAP_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_CLAMP_TO_EDGE; @@ -296,6 +218,15 @@ translate_tex_wrap(enum pipe_tex_wrap w) case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MIRRORED_REPEAT; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return MALI_WRAP_MIRRORED_CLAMP; + + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE; + + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER; + default: unreachable("Invalid wrap"); } @@ -334,39 +265,6 @@ panfrost_translate_compare_func(enum pipe_compare_func in) } } -static unsigned -panfrost_translate_alt_compare_func(enum pipe_compare_func in) -{ - switch (in) { - case PIPE_FUNC_NEVER: - return MALI_ALT_FUNC_NEVER; - - case PIPE_FUNC_LESS: - return MALI_ALT_FUNC_LESS; - - case PIPE_FUNC_EQUAL: - return MALI_ALT_FUNC_EQUAL; - - case PIPE_FUNC_LEQUAL: - return MALI_ALT_FUNC_LEQUAL; - - case PIPE_FUNC_GREATER: - return MALI_ALT_FUNC_GREATER; - - case PIPE_FUNC_NOTEQUAL: - return MALI_ALT_FUNC_NOTEQUAL; - - case PIPE_FUNC_GEQUAL: - return MALI_ALT_FUNC_GEQUAL; - - case PIPE_FUNC_ALWAYS: - return MALI_ALT_FUNC_ALWAYS; - - default: - unreachable("Invalid alt func"); - } -} - static unsigned panfrost_translate_stencil_op(enum pipe_stencil_op in) { @@ -454,38 +352,6 @@ panfrost_default_shader_backend(struct panfrost_context *ctx) memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); } -/* Generates a vertex/tiler job. This is, in some sense, the heart of the - * graphics command stream. It should be called once per draw, accordding to - * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in - * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for - * vertex jobs. */ - -struct panfrost_transfer -panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler) -{ - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - struct mali_job_descriptor_header job = { - .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX, - .job_descriptor_size = 1, - }; - - struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payloads[PIPE_SHADER_FRAGMENT] : &ctx->payloads[PIPE_SHADER_VERTEX]; - - struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(*payload)); - memcpy(transfer.cpu, &job, sizeof(job)); - memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload)); - return transfer; -} - -mali_ptr -panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i) -{ - struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i]; - struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); - - return rsrc->bo->gpu + buf->buffer_offset; -} - static bool panfrost_writes_point_size(struct panfrost_context *ctx) { @@ -504,7 +370,7 @@ panfrost_stage_attributes(struct panfrost_context *ctx) struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct panfrost_vertex_state *so = ctx->vertex; - size_t sz = sizeof(struct mali_attr_meta) * so->num_elements; + size_t sz = sizeof(struct mali_attr_meta) * PAN_MAX_ATTRIBUTE; struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sz); struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu; @@ -537,21 +403,36 @@ panfrost_stage_attributes(struct panfrost_context *ctx) for (unsigned i = 0; i < so->num_elements; ++i) { unsigned vbi = so->pipe[i].vertex_buffer_index; struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi]; - mali_ptr addr = panfrost_vertex_buffer_address(ctx, vbi); + struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); + mali_ptr addr = rsrc->bo->gpu + buf->buffer_offset; + + /* Adjust by the masked off bits of the offset. Make sure we + * read src_offset from so->hw (which is not GPU visible) + * rather than target (which is) due to caching effects */ - /* Adjust by the masked off bits of the offset */ - target[i].src_offset += (addr & 63); + unsigned src_offset = so->hw[i].src_offset; + src_offset += (addr & 63); /* Also, somewhat obscurely per-instance data needs to be * offset in response to a delayed start in an indexed draw */ - if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start) { - target[i].src_offset -= buf->stride * start; - } - + if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start) + src_offset -= buf->stride * start; + target[i].src_offset = src_offset; } + /* Let's also include vertex builtins */ + + struct mali_attr_meta builtin = { + .format = MALI_R32UI, + .swizzle = panfrost_get_default_swizzle(1) + }; + + /* See mali_attr_meta specification for the magic number */ + memcpy(&target[PAN_VERTEX_ID], &builtin, 4); + memcpy(&target[PAN_INSTANCE_ID], &builtin, 4); + ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu; } @@ -564,7 +445,7 @@ panfrost_upload_sampler_descriptors(struct panfrost_context *ctx) for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { mali_ptr upload = 0; - if (ctx->sampler_count[t] && ctx->sampler_view_count[t]) { + if (ctx->sampler_count[t]) { size_t transfer_size = desc_size * ctx->sampler_count[t]; struct panfrost_transfer transfer = @@ -583,25 +464,6 @@ panfrost_upload_sampler_descriptors(struct panfrost_context *ctx) } } -static enum mali_texture_layout -panfrost_layout_for_texture(struct panfrost_resource *rsrc) -{ - /* TODO: other linear depth textures */ - bool is_depth = rsrc->base.format == PIPE_FORMAT_Z32_UNORM; - - switch (rsrc->layout) { - case PAN_AFBC: - return MALI_TEXTURE_AFBC; - case PAN_TILED: - assert(!is_depth); - return MALI_TEXTURE_TILED; - case PAN_LINEAR: - return is_depth ? MALI_TEXTURE_TILED : MALI_TEXTURE_LINEAR; - default: - unreachable("Invalid texture layout"); - } -} - static mali_ptr panfrost_upload_tex( struct panfrost_context *ctx, @@ -614,55 +476,18 @@ panfrost_upload_tex( struct pipe_sampler_view *pview = &view->base; struct panfrost_resource *rsrc = pan_resource(pview->texture); - /* Do we interleave an explicit stride with every element? */ - - bool has_manual_stride = view->manual_stride; - - /* For easy access */ - - bool is_buffer = pview->target == PIPE_BUFFER; - unsigned first_level = is_buffer ? 0 : pview->u.tex.first_level; - unsigned last_level = is_buffer ? 0 : pview->u.tex.last_level; - unsigned first_layer = is_buffer ? 0 : pview->u.tex.first_layer; - unsigned last_layer = is_buffer ? 0 : pview->u.tex.last_layer; - - /* Lower-bit is set when sampling from colour AFBC */ - bool is_afbc = rsrc->layout == PAN_AFBC; - bool is_zs = rsrc->base.bind & PIPE_BIND_DEPTH_STENCIL; - unsigned afbc_bit = (is_afbc && !is_zs) ? 1 : 0; - /* Add the BO to the job so it's retained until the job is done. */ struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + panfrost_batch_add_bo(batch, rsrc->bo, PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | panfrost_bo_access_for_stage(st)); - /* Add the usage flags in, since they can change across the CSO - * lifetime due to layout switches */ - - view->hw.format.layout = panfrost_layout_for_texture(rsrc); - view->hw.format.manual_stride = has_manual_stride; - - /* Inject the addresses in, interleaving mip levels, cube faces, and - * strides in that order */ - - unsigned idx = 0; - - for (unsigned l = first_level; l <= last_level; ++l) { - for (unsigned f = first_layer; f <= last_layer; ++f) { - - view->hw.payload[idx++] = - panfrost_get_texture_address(rsrc, l, f) + afbc_bit; - - if (has_manual_stride) { - view->hw.payload[idx++] = - rsrc->slices[l].stride; - } - } - } + panfrost_batch_add_bo(batch, view->bo, + PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | + panfrost_bo_access_for_stage(st)); - return panfrost_upload_transient(batch, &view->hw, - sizeof(struct mali_texture_descriptor)); + return view->bo->gpu; } static void @@ -861,9 +686,11 @@ panfrost_map_constant_buffer_gpu( PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | panfrost_bo_access_for_stage(st)); - return rsrc->bo->gpu; + + /* Alignment gauranteed by PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */ + return rsrc->bo->gpu + cb->buffer_offset; } else if (cb->user_buffer) { - return panfrost_upload_transient(batch, cb->user_buffer, cb->buffer_size); + return panfrost_upload_transient(batch, cb->user_buffer + cb->buffer_offset, cb->buffer_size); } else { unreachable("No constant buffer"); } @@ -899,7 +726,7 @@ panfrost_patch_shader_state(struct panfrost_context *ctx, ss->tripipe->texture_count = ctx->sampler_view_count[stage]; ss->tripipe->sampler_count = ctx->sampler_count[stage]; - ss->tripipe->midgard1.flags = 0x220; + ss->tripipe->midgard1.flags_lo = 0x220; unsigned ubo_count = panfrost_ubo_count(ctx, stage); ss->tripipe->midgard1.uniform_buffer_count = ubo_count; @@ -936,9 +763,9 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_emit_varying_descriptor(ctx, total_count); } - bool msaa = ctx->rasterizer->base.multisample; - if (ctx->dirty & PAN_DIRTY_RASTERIZER) { + if (ctx->rasterizer) { + bool msaa = ctx->rasterizer->base.multisample; ctx->payloads[PIPE_SHADER_FRAGMENT].gl_enables = ctx->rasterizer->tiler_gl_enables; /* TODO: Sample size */ @@ -956,23 +783,22 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_patch_shader_state(ctx, PIPE_SHADER_VERTEX); panfrost_patch_shader_state(ctx, PIPE_SHADER_COMPUTE); - if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { + if (ctx->shader[PIPE_SHADER_VERTEX] && ctx->shader[PIPE_SHADER_FRAGMENT]) { /* Check if we need to link the gl_PointSize varying */ if (!panfrost_writes_point_size(ctx)) { /* If the size is constant, write it out. Otherwise, * don't touch primitive_size (since we would clobber * the pointer there) */ - ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.constant = ctx->rasterizer->base.line_width; + bool points = ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode == MALI_POINTS; + + ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.constant = points ? + ctx->rasterizer->base.point_size : + ctx->rasterizer->base.line_width; } } - /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */ - if (ctx->shader[PIPE_SHADER_FRAGMENT]) - ctx->dirty |= PAN_DIRTY_FS; - - if (ctx->dirty & PAN_DIRTY_FS) { - assert(ctx->shader[PIPE_SHADER_FRAGMENT]); + if (ctx->shader[PIPE_SHADER_FRAGMENT]) { struct panfrost_shader_state *variant = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant]; panfrost_patch_shader_state(ctx, PIPE_SHADER_FRAGMENT); @@ -987,8 +813,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) COPY(midgard1.uniform_count); COPY(midgard1.uniform_buffer_count); COPY(midgard1.work_count); - COPY(midgard1.flags); - COPY(midgard1.unknown2); + COPY(midgard1.flags_lo); + COPY(midgard1.flags_hi); #undef COPY @@ -996,9 +822,12 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS]; + unsigned shader_offset = 0; + struct panfrost_bo *shader_bo = NULL; - for (unsigned c = 0; c < rt_count; ++c) - blend[c] = panfrost_get_blend_for_context(ctx, c); + for (unsigned c = 0; c < rt_count; ++c) { + blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, &shader_offset); + } /* If there is a blend shader, work registers are shared. XXX: opt */ @@ -1010,12 +839,19 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) /* Depending on whether it's legal to in the given shader, we * try to enable early-z testing (or forward-pixel kill?) */ - SET_BIT(ctx->fragment_shader_core.midgard1.flags, MALI_EARLY_Z, !variant->can_discard); + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_EARLY_Z, + !variant->can_discard && !variant->writes_depth); + + /* Add the writes Z/S flags if needed. */ + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, + MALI_WRITES_Z, variant->writes_depth); + SET_BIT(ctx->fragment_shader_core.midgard1.flags_hi, + MALI_WRITES_S, variant->writes_stencil); /* Any time texturing is used, derivatives are implicitly * calculated, so we need to enable helper invocations */ - SET_BIT(ctx->fragment_shader_core.midgard1.flags, MALI_HELPER_INVOCATIONS, variant->helper_invocations); + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_HELPER_INVOCATIONS, variant->helper_invocations); /* Assign the stencil refs late */ @@ -1034,18 +870,22 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) */ SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, variant->can_discard); - SET_BIT(ctx->fragment_shader_core.midgard1.flags, 0x400, variant->can_discard); + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, 0x400, variant->can_discard); /* Even on MFBD, the shader descriptor gets blend shaders. It's * *also* copied to the blend_meta appended (by convention), * but this is the field actually read by the hardware. (Or - * maybe both are read...?) */ + * maybe both are read...?). Specify the last RTi with a blend + * shader. */ - if (blend[0].is_shader) { - ctx->fragment_shader_core.blend.shader = - blend[0].shader.bo->gpu | blend[0].shader.first_tag; - } else { - ctx->fragment_shader_core.blend.shader = 0; + ctx->fragment_shader_core.blend.shader = 0; + + for (signed rt = (rt_count - 1); rt >= 0; --rt) { + if (blend[rt].is_shader) { + ctx->fragment_shader_core.blend.shader = + blend[rt].shader.gpu | blend[rt].shader.first_tag; + break; + } } if (screen->quirks & MIDGARD_SFBD) { @@ -1090,16 +930,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb); SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither); - /* TODO: sRGB in blend shaders is currently - * unimplemented. Contact me (Alyssa) if you're - * interested in working on this. We have - * native Midgard ops for helping here, but - * they're not well-understood yet. */ - - assert(!(is_srgb && blend[i].is_shader)); - if (blend[i].is_shader) { - rts[i].blend.shader = blend[i].shader.bo->gpu | blend[i].shader.first_tag; + rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag; } else { rts[i].blend.equation = *blend[i].equation.equation; rts[i].blend.constant = blend[i].equation.constant; @@ -1114,11 +946,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) if (ctx->vertex) panfrost_stage_attributes(ctx); - if (ctx->dirty & PAN_DIRTY_SAMPLERS) - panfrost_upload_sampler_descriptors(ctx); - - if (ctx->dirty & PAN_DIRTY_TEXTURES) - panfrost_upload_texture_descriptors(ctx); + panfrost_upload_sampler_descriptors(ctx); + panfrost_upload_texture_descriptors(ctx); const struct pipe_viewport_state *vp = &ctx->pipe_viewport; @@ -1145,7 +974,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_upload_sysvals(ctx, transfer.cpu, ss, i); /* Upload uniforms */ - if (has_uniforms) { + if (has_uniforms && uniform_size) { const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0); memcpy(transfer.cpu + sys_size, cpu, uniform_size); } @@ -1162,12 +991,11 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unsigned ubo_count = panfrost_ubo_count(ctx, i); assert(ubo_count >= 1); - size_t sz = sizeof(struct mali_uniform_buffer_meta) * ubo_count; - struct mali_uniform_buffer_meta ubos[PAN_MAX_CONST_BUFFERS]; + size_t sz = sizeof(uint64_t) * ubo_count; + uint64_t ubos[PAN_MAX_CONST_BUFFERS]; /* Upload uniforms as a UBO */ - ubos[0].size = MALI_POSITIVE((2 + uniform_count)); - ubos[0].ptr = transfer.gpu >> 2; + ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu); /* The rest are honest-to-goodness UBOs */ @@ -1179,9 +1007,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) if (!enabled || empty) { /* Stub out disabled UBOs to catch accesses */ - - ubos[ubo].size = 0; - ubos[ubo].ptr = 0xDEAD0000; + ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000); continue; } @@ -1189,10 +1015,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unsigned bytes_per_field = 16; unsigned aligned = ALIGN_POT(usz, bytes_per_field); - unsigned fields = aligned / bytes_per_field; - - ubos[ubo].size = MALI_POSITIVE(fields); - ubos[ubo].ptr = gpu >> 2; + ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu); } mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz); @@ -1298,8 +1121,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_upload_transient(batch, &view, sizeof(struct mali_viewport)); - - ctx->dirty = 0; } /* Corresponds to exactly one draw, but does not submit anything */ @@ -1315,20 +1136,33 @@ panfrost_queue_draw(struct panfrost_context *ctx) bool rasterizer_discard = ctx->rasterizer && ctx->rasterizer->base.rasterizer_discard; - struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false); - struct panfrost_transfer tiler; - if (!rasterizer_discard) - tiler = panfrost_vertex_tiler_job(ctx, true); + struct midgard_payload_vertex_tiler *vertex_payload = &ctx->payloads[PIPE_SHADER_VERTEX]; + struct midgard_payload_vertex_tiler *tiler_payload = &ctx->payloads[PIPE_SHADER_FRAGMENT]; struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep; - if (rasterizer_discard) - panfrost_scoreboard_queue_vertex_job(batch, vertex, FALSE); - else if (ctx->wallpaper_batch && batch->first_tiler.gpu) - panfrost_scoreboard_queue_fused_job_prepend(batch, vertex, tiler); - else - panfrost_scoreboard_queue_fused_job(batch, vertex, tiler); + if (wallpapering) { + /* Inject in reverse order, with "predicted" job indices. THIS IS A HACK XXX */ + panfrost_new_job(batch, JOB_TYPE_TILER, false, batch->job_index + 2, tiler_payload, sizeof(*tiler_payload), true); + panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), true); + } else { + unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), false); + + if (!rasterizer_discard) + panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tiler_payload, sizeof(*tiler_payload), false); + } + + for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) { + struct panfrost_shader_variants *all = ctx->shader[i]; + + if (!all) + continue; + + struct panfrost_shader_state *ss = &all->variants[all->active_variant]; + batch->stack_size = MAX2(batch->stack_size, ss->stack_size); + } } /* The entire frame is in memory -- send it off to the kernel! */ @@ -1370,6 +1204,9 @@ panfrost_flush( util_dynarray_fini(&fences); } + + if (pan_debug & PAN_DBG_TRACE) + pandecode_next_frame(); } #define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c; @@ -1548,9 +1385,8 @@ panfrost_draw_vbo( draw_flags |= 0x3000; - if (mode == PIPE_PRIM_LINE_STRIP) { - draw_flags |= 0x800; - } + if (ctx->rasterizer && ctx->rasterizer->base.flatshade_first) + draw_flags |= MALI_DRAW_FLATSHADE_FIRST; panfrost_statistics_record(ctx, info); @@ -1576,8 +1412,6 @@ panfrost_draw_vbo( ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index; ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count); - //assert(!info->restart_index); /* TODO: Research */ - draw_flags |= panfrost_translate_index_size(info->index_size); ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = panfrost_get_index_buffer_mapped(ctx, info); } else { @@ -1588,7 +1422,7 @@ panfrost_draw_vbo( ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(ctx->vertex_count); /* Reverse index state */ - ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (u64) NULL; + ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (mali_ptr) 0; } /* Dispatch "compute jobs" for the vertex/tiler pair as (1, @@ -1605,24 +1439,16 @@ panfrost_draw_vbo( /* Encode the padded vertex count */ if (info->instance_count > 1) { - /* Triangles have non-even vertex counts so they change how - * padding works internally */ - - bool is_triangle = - mode == PIPE_PRIM_TRIANGLES || - mode == PIPE_PRIM_TRIANGLE_STRIP || - mode == PIPE_PRIM_TRIANGLE_FAN; - - struct pan_shift_odd so = - panfrost_padded_vertex_count(vertex_count, !is_triangle); + ctx->padded_count = panfrost_padded_vertex_count(vertex_count); - ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = so.shift; - ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = so.shift; + unsigned shift = __builtin_ctz(ctx->padded_count); + unsigned k = ctx->padded_count >> (shift + 1); - ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = so.odd; - ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = so.odd; + ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = shift; + ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = shift; - ctx->padded_count = pan_expand_shift_odd(so); + ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = k; + ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = k; } else { ctx->padded_count = vertex_count; @@ -1685,13 +1511,11 @@ panfrost_bind_rasterizer_state( { struct panfrost_context *ctx = pan_context(pctx); - /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */ + ctx->rasterizer = hwcso; + if (!hwcso) return; - ctx->rasterizer = hwcso; - ctx->dirty |= PAN_DIRTY_RASTERIZER; - ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units * 2.0f; ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale; @@ -1746,15 +1570,14 @@ panfrost_bind_vertex_elements_state( void *hwcso) { struct panfrost_context *ctx = pan_context(pctx); - ctx->vertex = hwcso; - ctx->dirty |= PAN_DIRTY_VERTEX; } static void * panfrost_create_shader_state( struct pipe_context *pctx, - const struct pipe_shader_state *cso) + const struct pipe_shader_state *cso, + enum pipe_shader_type stage) { struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants); so->base = *cso; @@ -1764,6 +1587,21 @@ panfrost_create_shader_state( if (cso->type == PIPE_SHADER_IR_TGSI) so->base.tokens = tgsi_dup_tokens(so->base.tokens); + /* Precompile for shader-db if we need to */ + if (unlikely((pan_debug & PAN_DBG_PRECOMPILE) && cso->type == PIPE_SHADER_IR_NIR)) { + struct panfrost_context *ctx = pan_context(pctx); + + struct mali_shader_meta meta; + struct panfrost_shader_state state; + uint64_t outputs_written; + + panfrost_shader_compile(ctx, &meta, + PIPE_SHADER_IR_NIR, + so->base.ir.nir, + tgsi_processor_to_shader_stage(stage), &state, + &outputs_written); + } + return so; } @@ -1783,6 +1621,7 @@ panfrost_delete_shader_state( panfrost_bo_unreference(shader_state->bo); shader_state->bo = NULL; } + free(cso->variants); free(so); } @@ -1812,16 +1651,18 @@ panfrost_create_sampler_state( .wrap_s = translate_tex_wrap(cso->wrap_s), .wrap_t = translate_tex_wrap(cso->wrap_t), .wrap_r = translate_tex_wrap(cso->wrap_r), - .compare_func = panfrost_translate_alt_compare_func(cso->compare_func), + .compare_func = panfrost_flip_compare_func( + panfrost_translate_compare_func( + cso->compare_func)), .border_color = { cso->border_color.f[0], cso->border_color.f[1], cso->border_color.f[2], cso->border_color.f[3] }, - .min_lod = FIXED_16(cso->min_lod), - .max_lod = FIXED_16(cso->max_lod), - .lod_bias = FIXED_16(cso->lod_bias), + .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */ + .max_lod = FIXED_16(cso->max_lod, false), + .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */ .seamless_cube_map = cso->seamless_cube_map, }; @@ -1830,16 +1671,17 @@ panfrost_create_sampler_state( * essentially -- remember these are fixed point numbers, so * epsilon=1/256) */ - if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) + if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { sampler_descriptor.max_lod = sampler_descriptor.min_lod; - /* Enforce that there is something in the middle by adding epsilon*/ + /* Enforce that there is something in the middle by adding epsilon*/ - if (sampler_descriptor.min_lod == sampler_descriptor.max_lod) - sampler_descriptor.max_lod++; + if (sampler_descriptor.min_lod == sampler_descriptor.max_lod) + sampler_descriptor.max_lod++; - /* Sanity check */ - assert(sampler_descriptor.max_lod > sampler_descriptor.min_lod); + /* Sanity check */ + assert(sampler_descriptor.max_lod > sampler_descriptor.min_lod); + } so->hw = sampler_descriptor; @@ -1860,8 +1702,6 @@ panfrost_bind_sampler_states( /* XXX: Should upload, not just copy? */ ctx->sampler_count[shader] = num_sampler; memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *)); - - ctx->dirty |= PAN_DIRTY_SAMPLERS; } static bool @@ -1955,14 +1795,8 @@ panfrost_bind_shader_state( enum pipe_shader_type type) { struct panfrost_context *ctx = pan_context(pctx); - ctx->shader[type] = hwcso; - if (type == PIPE_SHADER_FRAGMENT) - ctx->dirty |= PAN_DIRTY_FS; - else - ctx->dirty |= PAN_DIRTY_VS; - if (!hwcso) return; /* Match the appropriate variant */ @@ -1980,7 +1814,25 @@ panfrost_bind_shader_state( if (variant == -1) { /* No variant matched, so create a new one */ variant = variants->variant_count++; - assert(variants->variant_count < MAX_SHADER_VARIANTS); + + if (variants->variant_count > variants->variant_space) { + unsigned old_space = variants->variant_space; + + variants->variant_space *= 2; + if (variants->variant_space == 0) + variants->variant_space = 1; + + /* Arbitrary limit to stop runaway programs from + * creating an unbounded number of shader variants. */ + assert(variants->variant_space < 1024); + + unsigned msize = sizeof(struct panfrost_shader_state); + variants->variants = realloc(variants->variants, + variants->variant_space * msize); + + memset(&variants->variants[old_space], 0, + (variants->variant_space - old_space) * msize); + } struct panfrost_shader_state *v = &variants->variants[variant]; @@ -2030,6 +1882,18 @@ panfrost_bind_shader_state( } } +static void * +panfrost_create_vs_state(struct pipe_context *pctx, const struct pipe_shader_state *hwcso) +{ + return panfrost_create_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX); +} + +static void * +panfrost_create_fs_state(struct pipe_context *pctx, const struct pipe_shader_state *hwcso) +{ + return panfrost_create_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT); +} + static void panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso) { @@ -2084,9 +1948,6 @@ panfrost_set_stencil_ref( { struct panfrost_context *ctx = pan_context(pctx); ctx->stencil_ref = *ref; - - /* Shader core dirty */ - ctx->dirty |= PAN_DIRTY_FS; } static enum mali_texture_type @@ -2121,8 +1982,8 @@ panfrost_create_sampler_view( struct pipe_resource *texture, const struct pipe_sampler_view *template) { + struct panfrost_screen *screen = pan_screen(pctx->screen); struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view); - int bytes_per_pixel = util_format_get_blocksize(texture->format); pipe_reference(NULL, &texture->reference); @@ -2134,12 +1995,6 @@ panfrost_create_sampler_view( so->base.reference.count = 1; so->base.context = pctx; - /* sampler_views correspond to texture descriptors, minus the texture - * (data) itself. So, we serialise the descriptor here and cache it for - * later. */ - - const struct util_format_description *desc = util_format_description(prsrc->base.format); - unsigned char user_swizzle[4] = { template->swizzle_r, template->swizzle_g, @@ -2147,29 +2002,6 @@ panfrost_create_sampler_view( template->swizzle_a }; - enum mali_format format = panfrost_find_format(desc); - - /* Check if we need to set a custom stride by computing the "expected" - * stride and comparing it to what the BO actually wants. Only applies - * to linear textures, since tiled/compressed textures have strict - * alignment requirements for their strides as it is */ - - unsigned first_level = template->u.tex.first_level; - unsigned last_level = template->u.tex.last_level; - - if (prsrc->layout == PAN_LINEAR) { - for (unsigned l = first_level; l <= last_level; ++l) { - unsigned actual_stride = prsrc->slices[l].stride; - unsigned width = u_minify(texture->width0, l); - unsigned comp_stride = width * bytes_per_pixel; - - if (comp_stride != actual_stride) { - so->manual_stride = true; - break; - } - } - } - /* In the hardware, array_size refers specifically to array textures, * whereas in Gallium, it also covers cubemaps */ @@ -2181,26 +2013,32 @@ panfrost_create_sampler_view( array_size /= 6; } - struct mali_texture_descriptor texture_descriptor = { - .width = MALI_POSITIVE(u_minify(texture->width0, first_level)), - .height = MALI_POSITIVE(u_minify(texture->height0, first_level)), - .depth = MALI_POSITIVE(u_minify(texture->depth0, first_level)), - .array_size = MALI_POSITIVE(array_size), - - .format = { - .swizzle = panfrost_translate_swizzle_4(desc->swizzle), - .format = format, - .srgb = desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB, - .type = panfrost_translate_texture_type(template->target), - .unknown2 = 0x1, - }, - - .swizzle = panfrost_translate_swizzle_4(user_swizzle) - }; - - texture_descriptor.levels = last_level - first_level; - - so->hw = texture_descriptor; + enum mali_texture_type type = + panfrost_translate_texture_type(template->target); + + unsigned size = panfrost_estimate_texture_size( + template->u.tex.first_level, + template->u.tex.last_level, + template->u.tex.first_layer, + template->u.tex.last_layer, + type, prsrc->layout); + + so->bo = panfrost_bo_create(screen, size, 0); + + panfrost_new_texture( + so->bo->cpu, + texture->width0, texture->height0, + texture->depth0, array_size, + texture->format, + type, prsrc->layout, + template->u.tex.first_level, + template->u.tex.last_level, + template->u.tex.first_layer, + template->u.tex.last_layer, + prsrc->cubemap_stride, + panfrost_translate_swizzle_4(user_swizzle), + prsrc->bo->gpu, + prsrc->slices); return (struct pipe_sampler_view *) so; } @@ -2213,27 +2051,34 @@ panfrost_set_sampler_views( struct pipe_sampler_view **views) { struct panfrost_context *ctx = pan_context(pctx); + unsigned new_nr = 0; + unsigned i; assert(start_slot == 0); - unsigned new_nr = 0; - for (unsigned i = 0; i < num_views; ++i) { + for (i = 0; i < num_views; ++i) { if (views[i]) new_nr = i + 1; + pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i], + views[i]); } + for (; i < ctx->sampler_view_count[shader]; i++) { + pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i], + NULL); + } ctx->sampler_view_count[shader] = new_nr; - memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *)); - - ctx->dirty |= PAN_DIRTY_TEXTURES; } static void panfrost_sampler_view_destroy( struct pipe_context *pctx, - struct pipe_sampler_view *view) + struct pipe_sampler_view *pview) { - pipe_resource_reference(&view->texture, NULL); + struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview; + + pipe_resource_reference(&pview->texture, NULL); + panfrost_bo_unreference(view->bo); ralloc_free(view); } @@ -2266,14 +2111,14 @@ panfrost_hint_afbc( for (unsigned i = 0; i < fb->nr_cbufs; ++i) { struct pipe_surface *surf = fb->cbufs[i]; struct panfrost_resource *rsrc = pan_resource(surf->texture); - panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1); + panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1); } /* Also hint it to the depth buffer */ if (fb->zsbuf) { struct panfrost_resource *rsrc = pan_resource(fb->zsbuf->texture); - panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1); + panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1); } } @@ -2339,8 +2184,6 @@ panfrost_bind_depth_stencil_state(struct pipe_context *pipe, /* Bounds test not implemented */ assert(!depth_stencil->depth.bounds_test); - - ctx->dirty |= PAN_DIRTY_FS; } static void @@ -2481,7 +2324,7 @@ panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) break; default: - fprintf(stderr, "Skipping query %u\n", query->type); + DBG("Skipping query %u\n", query->type); break; } @@ -2613,7 +2456,6 @@ struct pipe_context * panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct panfrost_context *ctx = rzalloc(screen, struct panfrost_context); - struct panfrost_screen *pscreen = pan_screen(screen); struct pipe_context *gallium = (struct pipe_context *) ctx; gallium->screen = screen; @@ -2644,11 +2486,11 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state; gallium->delete_vertex_elements_state = panfrost_generic_cso_delete; - gallium->create_fs_state = panfrost_create_shader_state; + gallium->create_fs_state = panfrost_create_fs_state; gallium->delete_fs_state = panfrost_delete_shader_state; gallium->bind_fs_state = panfrost_bind_fs_state; - gallium->create_vs_state = panfrost_create_shader_state; + gallium->create_vs_state = panfrost_create_vs_state; gallium->delete_vs_state = panfrost_delete_shader_state; gallium->bind_vs_state = panfrost_bind_vs_state; @@ -2702,7 +2544,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) panfrost_batch_init(ctx); panfrost_emit_vertex_payload(ctx); - panfrost_emit_tiler_payload(ctx); panfrost_invalidate_frame(ctx); panfrost_default_shader_backend(ctx);