X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fpanfrost%2Fpan_context.c;h=ff00c2129bf1ee76acca620f704e0c5a40e00ccf;hb=b929565ea8d4846b1943f35b6e86d685a616ed54;hp=984294b0b29f493f948b7353a97c78991a34ee4d;hpb=3909b160002ab9cd09e3e6da728d4da5d4afc824;p=mesa.git diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 984294b0b29..ff00c2129bf 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -29,7 +29,6 @@ #include "pan_bo.h" #include "pan_context.h" -#include "pan_format.h" #include "panfrost-quirks.h" #include "util/macros.h" @@ -52,6 +51,7 @@ #include "pan_blending.h" #include "pan_blend_shaders.h" #include "pan_util.h" +#include "pandecode/decode.h" struct midgard_tiler_descriptor panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count) @@ -140,6 +140,9 @@ panfrost_clear( panfrost_batch_clear(batch, buffers, color, depth, stencil); } +/* TODO: Bifrost requires just a mali_shared_memory, without the rest of the + * framebuffer */ + static void panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) { @@ -151,7 +154,7 @@ panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) if (!batch->framebuffer.gpu) { unsigned size = (screen->quirks & MIDGARD_SFBD) ? sizeof(struct mali_single_framebuffer) : - sizeof(struct bifrost_framebuffer); + sizeof(struct mali_framebuffer); batch->framebuffer = panfrost_allocate_transient(batch, size); @@ -161,7 +164,7 @@ panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) } for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) - ctx->payloads[i].postfix.framebuffer = batch->framebuffer.gpu; + ctx->payloads[i].postfix.shared_memory = batch->framebuffer.gpu; } /* Reset per-frame context, called on context initialisation as well as after @@ -171,13 +174,7 @@ void panfrost_invalidate_frame(struct panfrost_context *ctx) { for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) - ctx->payloads[i].postfix.framebuffer = 0; - - if (ctx->rasterizer) - ctx->dirty |= PAN_DIRTY_RASTERIZER; - - /* XXX */ - ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES; + ctx->payloads[i].postfix.shared_memory = 0; /* TODO: When does this need to be handled? */ ctx->active_queries = true; @@ -202,18 +199,6 @@ panfrost_emit_vertex_payload(struct panfrost_context *ctx) memcpy(&ctx->payloads[PIPE_SHADER_COMPUTE], &payload, sizeof(payload)); } -static void -panfrost_emit_tiler_payload(struct panfrost_context *ctx) -{ - struct midgard_payload_vertex_tiler payload = { - .prefix = { - .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */ - }, - }; - - memcpy(&ctx->payloads[PIPE_SHADER_FRAGMENT], &payload, sizeof(payload)); -} - static unsigned translate_tex_wrap(enum pipe_tex_wrap w) { @@ -367,38 +352,6 @@ panfrost_default_shader_backend(struct panfrost_context *ctx) memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); } -/* Generates a vertex/tiler job. This is, in some sense, the heart of the - * graphics command stream. It should be called once per draw, accordding to - * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in - * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for - * vertex jobs. */ - -struct panfrost_transfer -panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler) -{ - struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - struct mali_job_descriptor_header job = { - .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX, - .job_descriptor_size = 1, - }; - - struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payloads[PIPE_SHADER_FRAGMENT] : &ctx->payloads[PIPE_SHADER_VERTEX]; - - struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(*payload)); - memcpy(transfer.cpu, &job, sizeof(job)); - memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload)); - return transfer; -} - -mali_ptr -panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i) -{ - struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i]; - struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); - - return rsrc->bo->gpu + buf->buffer_offset; -} - static bool panfrost_writes_point_size(struct panfrost_context *ctx) { @@ -450,25 +403,35 @@ panfrost_stage_attributes(struct panfrost_context *ctx) for (unsigned i = 0; i < so->num_elements; ++i) { unsigned vbi = so->pipe[i].vertex_buffer_index; struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi]; - mali_ptr addr = panfrost_vertex_buffer_address(ctx, vbi); + struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); + mali_ptr addr = rsrc->bo->gpu + buf->buffer_offset; + + /* Adjust by the masked off bits of the offset. Make sure we + * read src_offset from so->hw (which is not GPU visible) + * rather than target (which is) due to caching effects */ - /* Adjust by the masked off bits of the offset */ - target[i].src_offset += (addr & 63); + unsigned src_offset = so->hw[i].src_offset; + src_offset += (addr & 63); /* Also, somewhat obscurely per-instance data needs to be * offset in response to a delayed start in an indexed draw */ if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start) - target[i].src_offset -= buf->stride * start; + src_offset -= buf->stride * start; + + target[i].src_offset = src_offset; } /* Let's also include vertex builtins */ - target[PAN_VERTEX_ID].format = MALI_R32UI; - target[PAN_VERTEX_ID].swizzle = panfrost_get_default_swizzle(1); + struct mali_attr_meta builtin = { + .format = MALI_R32UI, + .swizzle = panfrost_get_default_swizzle(1) + }; - target[PAN_INSTANCE_ID].format = MALI_R32UI; - target[PAN_INSTANCE_ID].swizzle = panfrost_get_default_swizzle(1); + /* See mali_attr_meta specification for the magic number */ + memcpy(&target[PAN_VERTEX_ID], &builtin, 4); + memcpy(&target[PAN_INSTANCE_ID], &builtin, 4); ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu; } @@ -501,25 +464,6 @@ panfrost_upload_sampler_descriptors(struct panfrost_context *ctx) } } -static enum mali_texture_layout -panfrost_layout_for_texture(struct panfrost_resource *rsrc) -{ - /* TODO: other linear depth textures */ - bool is_depth = rsrc->base.format == PIPE_FORMAT_Z32_UNORM; - - switch (rsrc->layout) { - case PAN_AFBC: - return MALI_TEXTURE_AFBC; - case PAN_TILED: - assert(!is_depth); - return MALI_TEXTURE_TILED; - case PAN_LINEAR: - return is_depth ? MALI_TEXTURE_TILED : MALI_TEXTURE_LINEAR; - default: - unreachable("Invalid texture layout"); - } -} - static mali_ptr panfrost_upload_tex( struct panfrost_context *ctx, @@ -532,55 +476,18 @@ panfrost_upload_tex( struct pipe_sampler_view *pview = &view->base; struct panfrost_resource *rsrc = pan_resource(pview->texture); - /* Do we interleave an explicit stride with every element? */ - - bool has_manual_stride = view->manual_stride; - - /* For easy access */ - - bool is_buffer = pview->target == PIPE_BUFFER; - unsigned first_level = is_buffer ? 0 : pview->u.tex.first_level; - unsigned last_level = is_buffer ? 0 : pview->u.tex.last_level; - unsigned first_layer = is_buffer ? 0 : pview->u.tex.first_layer; - unsigned last_layer = is_buffer ? 0 : pview->u.tex.last_layer; - - /* Lower-bit is set when sampling from colour AFBC */ - bool is_afbc = rsrc->layout == PAN_AFBC; - bool is_zs = rsrc->base.bind & PIPE_BIND_DEPTH_STENCIL; - unsigned afbc_bit = (is_afbc && !is_zs) ? 1 : 0; - /* Add the BO to the job so it's retained until the job is done. */ struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + panfrost_batch_add_bo(batch, rsrc->bo, PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | panfrost_bo_access_for_stage(st)); - /* Add the usage flags in, since they can change across the CSO - * lifetime due to layout switches */ - - view->hw.format.layout = panfrost_layout_for_texture(rsrc); - view->hw.format.manual_stride = has_manual_stride; - - /* Inject the addresses in, interleaving mip levels, cube faces, and - * strides in that order */ - - unsigned idx = 0; - - for (unsigned l = first_level; l <= last_level; ++l) { - for (unsigned f = first_layer; f <= last_layer; ++f) { - - view->hw.payload[idx++] = - panfrost_get_texture_address(rsrc, l, f) + afbc_bit; - - if (has_manual_stride) { - view->hw.payload[idx++] = - rsrc->slices[l].stride; - } - } - } + panfrost_batch_add_bo(batch, view->bo, + PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | + panfrost_bo_access_for_stage(st)); - return panfrost_upload_transient(batch, &view->hw, - sizeof(struct mali_texture_descriptor)); + return view->bo->gpu; } static void @@ -779,9 +686,11 @@ panfrost_map_constant_buffer_gpu( PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | panfrost_bo_access_for_stage(st)); - return rsrc->bo->gpu; + + /* Alignment gauranteed by PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */ + return rsrc->bo->gpu + cb->buffer_offset; } else if (cb->user_buffer) { - return panfrost_upload_transient(batch, cb->user_buffer, cb->buffer_size); + return panfrost_upload_transient(batch, cb->user_buffer + cb->buffer_offset, cb->buffer_size); } else { unreachable("No constant buffer"); } @@ -817,7 +726,7 @@ panfrost_patch_shader_state(struct panfrost_context *ctx, ss->tripipe->texture_count = ctx->sampler_view_count[stage]; ss->tripipe->sampler_count = ctx->sampler_count[stage]; - ss->tripipe->midgard1.flags = 0x220; + ss->tripipe->midgard1.flags_lo = 0x220; unsigned ubo_count = panfrost_ubo_count(ctx, stage); ss->tripipe->midgard1.uniform_buffer_count = ubo_count; @@ -854,9 +763,9 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_emit_varying_descriptor(ctx, total_count); } - bool msaa = ctx->rasterizer->base.multisample; - if (ctx->dirty & PAN_DIRTY_RASTERIZER) { + if (ctx->rasterizer) { + bool msaa = ctx->rasterizer->base.multisample; ctx->payloads[PIPE_SHADER_FRAGMENT].gl_enables = ctx->rasterizer->tiler_gl_enables; /* TODO: Sample size */ @@ -874,7 +783,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_patch_shader_state(ctx, PIPE_SHADER_VERTEX); panfrost_patch_shader_state(ctx, PIPE_SHADER_COMPUTE); - if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { + if (ctx->shader[PIPE_SHADER_VERTEX] && ctx->shader[PIPE_SHADER_FRAGMENT]) { /* Check if we need to link the gl_PointSize varying */ if (!panfrost_writes_point_size(ctx)) { /* If the size is constant, write it out. Otherwise, @@ -889,12 +798,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) } } - /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */ - if (ctx->shader[PIPE_SHADER_FRAGMENT]) - ctx->dirty |= PAN_DIRTY_FS; - - if (ctx->dirty & PAN_DIRTY_FS) { - assert(ctx->shader[PIPE_SHADER_FRAGMENT]); + if (ctx->shader[PIPE_SHADER_FRAGMENT]) { struct panfrost_shader_state *variant = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant]; panfrost_patch_shader_state(ctx, PIPE_SHADER_FRAGMENT); @@ -909,8 +813,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) COPY(midgard1.uniform_count); COPY(midgard1.uniform_buffer_count); COPY(midgard1.work_count); - COPY(midgard1.flags); - COPY(midgard1.unknown2); + COPY(midgard1.flags_lo); + COPY(midgard1.flags_hi); #undef COPY @@ -918,9 +822,12 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS]; + unsigned shader_offset = 0; + struct panfrost_bo *shader_bo = NULL; - for (unsigned c = 0; c < rt_count; ++c) - blend[c] = panfrost_get_blend_for_context(ctx, c); + for (unsigned c = 0; c < rt_count; ++c) { + blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, &shader_offset); + } /* If there is a blend shader, work registers are shared. XXX: opt */ @@ -932,12 +839,19 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) /* Depending on whether it's legal to in the given shader, we * try to enable early-z testing (or forward-pixel kill?) */ - SET_BIT(ctx->fragment_shader_core.midgard1.flags, MALI_EARLY_Z, !variant->can_discard); + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_EARLY_Z, + !variant->can_discard && !variant->writes_depth); + + /* Add the writes Z/S flags if needed. */ + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, + MALI_WRITES_Z, variant->writes_depth); + SET_BIT(ctx->fragment_shader_core.midgard1.flags_hi, + MALI_WRITES_S, variant->writes_stencil); /* Any time texturing is used, derivatives are implicitly * calculated, so we need to enable helper invocations */ - SET_BIT(ctx->fragment_shader_core.midgard1.flags, MALI_HELPER_INVOCATIONS, variant->helper_invocations); + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_HELPER_INVOCATIONS, variant->helper_invocations); /* Assign the stencil refs late */ @@ -956,18 +870,22 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) */ SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, variant->can_discard); - SET_BIT(ctx->fragment_shader_core.midgard1.flags, 0x400, variant->can_discard); + SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, 0x400, variant->can_discard); /* Even on MFBD, the shader descriptor gets blend shaders. It's * *also* copied to the blend_meta appended (by convention), * but this is the field actually read by the hardware. (Or - * maybe both are read...?) */ + * maybe both are read...?). Specify the last RTi with a blend + * shader. */ - if (blend[0].is_shader) { - ctx->fragment_shader_core.blend.shader = - blend[0].shader.bo->gpu | blend[0].shader.first_tag; - } else { - ctx->fragment_shader_core.blend.shader = 0; + ctx->fragment_shader_core.blend.shader = 0; + + for (signed rt = (rt_count - 1); rt >= 0; --rt) { + if (blend[rt].is_shader) { + ctx->fragment_shader_core.blend.shader = + blend[rt].shader.gpu | blend[rt].shader.first_tag; + break; + } } if (screen->quirks & MIDGARD_SFBD) { @@ -1012,16 +930,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb); SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither); - /* TODO: sRGB in blend shaders is currently - * unimplemented. Contact me (Alyssa) if you're - * interested in working on this. We have - * native Midgard ops for helping here, but - * they're not well-understood yet. */ - - assert(!(is_srgb && blend[i].is_shader)); - if (blend[i].is_shader) { - rts[i].blend.shader = blend[i].shader.bo->gpu | blend[i].shader.first_tag; + rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag; } else { rts[i].blend.equation = *blend[i].equation.equation; rts[i].blend.constant = blend[i].equation.constant; @@ -1036,11 +946,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) if (ctx->vertex) panfrost_stage_attributes(ctx); - if (ctx->dirty & PAN_DIRTY_SAMPLERS) - panfrost_upload_sampler_descriptors(ctx); - - if (ctx->dirty & PAN_DIRTY_TEXTURES) - panfrost_upload_texture_descriptors(ctx); + panfrost_upload_sampler_descriptors(ctx); + panfrost_upload_texture_descriptors(ctx); const struct pipe_viewport_state *vp = &ctx->pipe_viewport; @@ -1067,7 +974,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_upload_sysvals(ctx, transfer.cpu, ss, i); /* Upload uniforms */ - if (has_uniforms) { + if (has_uniforms && uniform_size) { const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0); memcpy(transfer.cpu + sys_size, cpu, uniform_size); } @@ -1084,12 +991,11 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unsigned ubo_count = panfrost_ubo_count(ctx, i); assert(ubo_count >= 1); - size_t sz = sizeof(struct mali_uniform_buffer_meta) * ubo_count; - struct mali_uniform_buffer_meta ubos[PAN_MAX_CONST_BUFFERS]; + size_t sz = sizeof(uint64_t) * ubo_count; + uint64_t ubos[PAN_MAX_CONST_BUFFERS]; /* Upload uniforms as a UBO */ - ubos[0].size = MALI_POSITIVE((2 + uniform_count)); - ubos[0].ptr = transfer.gpu >> 2; + ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu); /* The rest are honest-to-goodness UBOs */ @@ -1101,9 +1007,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) if (!enabled || empty) { /* Stub out disabled UBOs to catch accesses */ - - ubos[ubo].size = 0; - ubos[ubo].ptr = 0xDEAD0000; + ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000); continue; } @@ -1111,10 +1015,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) unsigned bytes_per_field = 16; unsigned aligned = ALIGN_POT(usz, bytes_per_field); - unsigned fields = aligned / bytes_per_field; - - ubos[ubo].size = MALI_POSITIVE(fields); - ubos[ubo].ptr = gpu >> 2; + ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu); } mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz); @@ -1220,8 +1121,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_upload_transient(batch, &view, sizeof(struct mali_viewport)); - - ctx->dirty = 0; } /* Corresponds to exactly one draw, but does not submit anything */ @@ -1237,20 +1136,23 @@ panfrost_queue_draw(struct panfrost_context *ctx) bool rasterizer_discard = ctx->rasterizer && ctx->rasterizer->base.rasterizer_discard; - struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false); - struct panfrost_transfer tiler; - if (!rasterizer_discard) - tiler = panfrost_vertex_tiler_job(ctx, true); + struct midgard_payload_vertex_tiler *vertex_payload = &ctx->payloads[PIPE_SHADER_VERTEX]; + struct midgard_payload_vertex_tiler *tiler_payload = &ctx->payloads[PIPE_SHADER_FRAGMENT]; struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep; - if (rasterizer_discard) - panfrost_scoreboard_queue_vertex_job(batch, vertex, FALSE); - else if (ctx->wallpaper_batch && batch->first_tiler.gpu) - panfrost_scoreboard_queue_fused_job_prepend(batch, vertex, tiler); - else - panfrost_scoreboard_queue_fused_job(batch, vertex, tiler); + if (wallpapering) { + /* Inject in reverse order, with "predicted" job indices. THIS IS A HACK XXX */ + panfrost_new_job(batch, JOB_TYPE_TILER, false, batch->job_index + 2, tiler_payload, sizeof(*tiler_payload), true); + panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), true); + } else { + unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), false); + + if (!rasterizer_discard) + panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tiler_payload, sizeof(*tiler_payload), false); + } for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) { struct panfrost_shader_variants *all = ctx->shader[i]; @@ -1302,6 +1204,9 @@ panfrost_flush( util_dynarray_fini(&fences); } + + if (pan_debug & PAN_DBG_TRACE) + pandecode_next_frame(); } #define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c; @@ -1507,8 +1412,6 @@ panfrost_draw_vbo( ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index; ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count); - //assert(!info->restart_index); /* TODO: Research */ - draw_flags |= panfrost_translate_index_size(info->index_size); ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = panfrost_get_index_buffer_mapped(ctx, info); } else { @@ -1519,7 +1422,7 @@ panfrost_draw_vbo( ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(ctx->vertex_count); /* Reverse index state */ - ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (u64) NULL; + ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (mali_ptr) 0; } /* Dispatch "compute jobs" for the vertex/tiler pair as (1, @@ -1608,13 +1511,11 @@ panfrost_bind_rasterizer_state( { struct panfrost_context *ctx = pan_context(pctx); - /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */ + ctx->rasterizer = hwcso; + if (!hwcso) return; - ctx->rasterizer = hwcso; - ctx->dirty |= PAN_DIRTY_RASTERIZER; - ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units * 2.0f; ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale; @@ -1669,9 +1570,7 @@ panfrost_bind_vertex_elements_state( void *hwcso) { struct panfrost_context *ctx = pan_context(pctx); - ctx->vertex = hwcso; - ctx->dirty |= PAN_DIRTY_VERTEX; } static void * @@ -1722,6 +1621,7 @@ panfrost_delete_shader_state( panfrost_bo_unreference(shader_state->bo); shader_state->bo = NULL; } + free(cso->variants); free(so); } @@ -1760,9 +1660,9 @@ panfrost_create_sampler_state( cso->border_color.f[2], cso->border_color.f[3] }, - .min_lod = FIXED_16(cso->min_lod), - .max_lod = FIXED_16(cso->max_lod), - .lod_bias = FIXED_16(cso->lod_bias), + .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */ + .max_lod = FIXED_16(cso->max_lod, false), + .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */ .seamless_cube_map = cso->seamless_cube_map, }; @@ -1771,16 +1671,17 @@ panfrost_create_sampler_state( * essentially -- remember these are fixed point numbers, so * epsilon=1/256) */ - if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) + if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { sampler_descriptor.max_lod = sampler_descriptor.min_lod; - /* Enforce that there is something in the middle by adding epsilon*/ + /* Enforce that there is something in the middle by adding epsilon*/ - if (sampler_descriptor.min_lod == sampler_descriptor.max_lod) - sampler_descriptor.max_lod++; + if (sampler_descriptor.min_lod == sampler_descriptor.max_lod) + sampler_descriptor.max_lod++; - /* Sanity check */ - assert(sampler_descriptor.max_lod > sampler_descriptor.min_lod); + /* Sanity check */ + assert(sampler_descriptor.max_lod > sampler_descriptor.min_lod); + } so->hw = sampler_descriptor; @@ -1801,8 +1702,6 @@ panfrost_bind_sampler_states( /* XXX: Should upload, not just copy? */ ctx->sampler_count[shader] = num_sampler; memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *)); - - ctx->dirty |= PAN_DIRTY_SAMPLERS; } static bool @@ -1896,14 +1795,8 @@ panfrost_bind_shader_state( enum pipe_shader_type type) { struct panfrost_context *ctx = pan_context(pctx); - ctx->shader[type] = hwcso; - if (type == PIPE_SHADER_FRAGMENT) - ctx->dirty |= PAN_DIRTY_FS; - else - ctx->dirty |= PAN_DIRTY_VS; - if (!hwcso) return; /* Match the appropriate variant */ @@ -1921,7 +1814,25 @@ panfrost_bind_shader_state( if (variant == -1) { /* No variant matched, so create a new one */ variant = variants->variant_count++; - assert(variants->variant_count < MAX_SHADER_VARIANTS); + + if (variants->variant_count > variants->variant_space) { + unsigned old_space = variants->variant_space; + + variants->variant_space *= 2; + if (variants->variant_space == 0) + variants->variant_space = 1; + + /* Arbitrary limit to stop runaway programs from + * creating an unbounded number of shader variants. */ + assert(variants->variant_space < 1024); + + unsigned msize = sizeof(struct panfrost_shader_state); + variants->variants = realloc(variants->variants, + variants->variant_space * msize); + + memset(&variants->variants[old_space], 0, + (variants->variant_space - old_space) * msize); + } struct panfrost_shader_state *v = &variants->variants[variant]; @@ -2037,9 +1948,6 @@ panfrost_set_stencil_ref( { struct panfrost_context *ctx = pan_context(pctx); ctx->stencil_ref = *ref; - - /* Shader core dirty */ - ctx->dirty |= PAN_DIRTY_FS; } static enum mali_texture_type @@ -2074,8 +1982,8 @@ panfrost_create_sampler_view( struct pipe_resource *texture, const struct pipe_sampler_view *template) { + struct panfrost_screen *screen = pan_screen(pctx->screen); struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view); - int bytes_per_pixel = util_format_get_blocksize(texture->format); pipe_reference(NULL, &texture->reference); @@ -2087,12 +1995,6 @@ panfrost_create_sampler_view( so->base.reference.count = 1; so->base.context = pctx; - /* sampler_views correspond to texture descriptors, minus the texture - * (data) itself. So, we serialise the descriptor here and cache it for - * later. */ - - const struct util_format_description *desc = util_format_description(prsrc->base.format); - unsigned char user_swizzle[4] = { template->swizzle_r, template->swizzle_g, @@ -2100,29 +2002,6 @@ panfrost_create_sampler_view( template->swizzle_a }; - enum mali_format format = panfrost_find_format(desc); - - /* Check if we need to set a custom stride by computing the "expected" - * stride and comparing it to what the BO actually wants. Only applies - * to linear textures, since tiled/compressed textures have strict - * alignment requirements for their strides as it is */ - - unsigned first_level = template->u.tex.first_level; - unsigned last_level = template->u.tex.last_level; - - if (prsrc->layout == PAN_LINEAR) { - for (unsigned l = first_level; l <= last_level; ++l) { - unsigned actual_stride = prsrc->slices[l].stride; - unsigned width = u_minify(texture->width0, l); - unsigned comp_stride = width * bytes_per_pixel; - - if (comp_stride != actual_stride) { - so->manual_stride = true; - break; - } - } - } - /* In the hardware, array_size refers specifically to array textures, * whereas in Gallium, it also covers cubemaps */ @@ -2134,26 +2013,32 @@ panfrost_create_sampler_view( array_size /= 6; } - struct mali_texture_descriptor texture_descriptor = { - .width = MALI_POSITIVE(u_minify(texture->width0, first_level)), - .height = MALI_POSITIVE(u_minify(texture->height0, first_level)), - .depth = MALI_POSITIVE(u_minify(texture->depth0, first_level)), - .array_size = MALI_POSITIVE(array_size), - - .format = { - .swizzle = panfrost_translate_swizzle_4(desc->swizzle), - .format = format, - .srgb = desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB, - .type = panfrost_translate_texture_type(template->target), - .unknown2 = 0x1, - }, - - .swizzle = panfrost_translate_swizzle_4(user_swizzle) - }; - - texture_descriptor.levels = last_level - first_level; - - so->hw = texture_descriptor; + enum mali_texture_type type = + panfrost_translate_texture_type(template->target); + + unsigned size = panfrost_estimate_texture_size( + template->u.tex.first_level, + template->u.tex.last_level, + template->u.tex.first_layer, + template->u.tex.last_layer, + type, prsrc->layout); + + so->bo = panfrost_bo_create(screen, size, 0); + + panfrost_new_texture( + so->bo->cpu, + texture->width0, texture->height0, + texture->depth0, array_size, + texture->format, + type, prsrc->layout, + template->u.tex.first_level, + template->u.tex.last_level, + template->u.tex.first_layer, + template->u.tex.last_layer, + prsrc->cubemap_stride, + panfrost_translate_swizzle_4(user_swizzle), + prsrc->bo->gpu, + prsrc->slices); return (struct pipe_sampler_view *) so; } @@ -2183,16 +2068,17 @@ panfrost_set_sampler_views( NULL); } ctx->sampler_view_count[shader] = new_nr; - - ctx->dirty |= PAN_DIRTY_TEXTURES; } static void panfrost_sampler_view_destroy( struct pipe_context *pctx, - struct pipe_sampler_view *view) + struct pipe_sampler_view *pview) { - pipe_resource_reference(&view->texture, NULL); + struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview; + + pipe_resource_reference(&pview->texture, NULL); + panfrost_bo_unreference(view->bo); ralloc_free(view); } @@ -2225,14 +2111,14 @@ panfrost_hint_afbc( for (unsigned i = 0; i < fb->nr_cbufs; ++i) { struct pipe_surface *surf = fb->cbufs[i]; struct panfrost_resource *rsrc = pan_resource(surf->texture); - panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1); + panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1); } /* Also hint it to the depth buffer */ if (fb->zsbuf) { struct panfrost_resource *rsrc = pan_resource(fb->zsbuf->texture); - panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1); + panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1); } } @@ -2298,8 +2184,6 @@ panfrost_bind_depth_stencil_state(struct pipe_context *pipe, /* Bounds test not implemented */ assert(!depth_stencil->depth.bounds_test); - - ctx->dirty |= PAN_DIRTY_FS; } static void @@ -2440,7 +2324,7 @@ panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) break; default: - fprintf(stderr, "Skipping query %u\n", query->type); + DBG("Skipping query %u\n", query->type); break; } @@ -2660,7 +2544,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) panfrost_batch_init(ctx); panfrost_emit_vertex_payload(ctx); - panfrost_emit_tiler_payload(ctx); panfrost_invalidate_frame(ctx); panfrost_default_shader_backend(ctx);