X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fpanfrost%2Fpan_context.c;h=fd1fa7f328bd5ad52666708d26cd5503b5e20e3e;hb=b8739c24ee2fdccc60e4e18357eb9e63ae2b8183;hp=48e471eace260702cd96145dd555552dc678b73b;hpb=9dd84db7a5d7ae74f7fca835ae51fa6a88313d09;p=mesa.git diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 48e471eace2..fd1fa7f328b 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -50,19 +50,6 @@ extern const char *pan_counters_base; /* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ //#define DRY_RUN -/* TODO: Sample size, etc */ - -static void -panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) -{ - struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); - - job->msaa |= enabled; - - SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled); - SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled); -} - /* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically * indepdent between color buffers and depth/stencil). To enable, we allocate * the AFBC metadata buffer and mark that it is enabled. We do -not- actually @@ -83,7 +70,7 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsr int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format); - int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */ + int stride = bytes_per_pixel * ALIGN(rsrc->base.width0, 16); stride *= 2; /* TODO: Should this be carried over? */ int main_size = stride * rsrc->base.height0; @@ -324,39 +311,6 @@ panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) ctx->payload_tiler.postfix.framebuffer = framebuffer; } -static void -panfrost_viewport(struct panfrost_context *ctx, - float depth_clip_near, - float depth_clip_far, - int viewport_x0, int viewport_y0, - int viewport_x1, int viewport_y1) -{ - /* Clip bounds are encoded as floats. The viewport itself is encoded as - * (somewhat) asymmetric ints. */ - - struct mali_viewport ret = { - /* By default, do no viewport clipping, i.e. clip to (-inf, - * inf) in each direction. Clipping to the viewport in theory - * should work, but in practice causes issues when we're not - * explicitly trying to scissor */ - - .clip_minx = -inff, - .clip_miny = -inff, - .clip_maxx = inff, - .clip_maxy = inff, - - /* We always perform depth clipping (TODO: Can this be disabled?) */ - - .clip_minz = depth_clip_near, - .clip_maxz = depth_clip_far, - - .viewport0 = { viewport_x0, viewport_y0 }, - .viewport1 = { MALI_POSITIVE(viewport_x1), MALI_POSITIVE(viewport_y1) }, - }; - - memcpy(ctx->viewport, &ret, sizeof(ret)); -} - /* Reset per-frame context, called on context initialisation as well as after * flushing a frame */ @@ -426,11 +380,6 @@ panfrost_emit_tiler_payload(struct panfrost_context *ctx) }, }; - /* Reserve the viewport */ - struct panfrost_transfer t = panfrost_allocate_chunk(ctx, sizeof(struct mali_viewport), HEAP_DESCRIPTOR); - ctx->viewport = (struct mali_viewport *) t.cpu; - payload.postfix.viewport = t.gpu; - memcpy(&ctx->payload_tiler, &payload, sizeof(payload)); } @@ -698,6 +647,90 @@ panfrost_set_value_job(struct panfrost_context *ctx) ctx->set_value_job = transfer.gpu; } +static mali_ptr +panfrost_emit_varyings( + struct panfrost_context *ctx, + union mali_attr *slot, + unsigned stride, + unsigned count) +{ + mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height; + + /* Fill out the descriptor */ + slot->elements = varying_address | MALI_ATTR_LINEAR; + slot->stride = stride; + slot->size = stride * count; + + ctx->varying_height += ALIGN(slot->size, 64); + assert(ctx->varying_height < ctx->varying_mem.size); + + return varying_address; +} + +static void +panfrost_emit_point_coord(union mali_attr *slot) +{ + slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR; + slot->stride = slot->size = 0; +} + +static void +panfrost_emit_varying_descriptor( + struct panfrost_context *ctx, + unsigned invocation_count) +{ + /* Load the shaders */ + + struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; + struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant]; + + /* Allocate the varying descriptor */ + + size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count; + size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count; + + struct panfrost_transfer trans = panfrost_allocate_transient(ctx, + vs_size + fs_size); + + memcpy(trans.cpu, vs->varyings, vs_size); + memcpy(trans.cpu + vs_size, fs->varyings, fs_size); + + ctx->payload_vertex.postfix.varying_meta = trans.gpu; + ctx->payload_tiler.postfix.varying_meta = trans.gpu + vs_size; + + /* Buffer indices must be in this order per our convention */ + union mali_attr varyings[PIPE_MAX_ATTRIBS]; + unsigned idx = 0; + + /* General varyings -- use the VS's, since those are more likely to be + * accurate on desktop */ + + panfrost_emit_varyings(ctx, &varyings[idx++], + vs->general_varying_stride, invocation_count); + + /* fp32 vec4 gl_Position */ + ctx->payload_tiler.postfix.position_varying = + panfrost_emit_varyings(ctx, &varyings[idx++], + sizeof(float) * 4, invocation_count); + + + if (vs->writes_point_size || fs->reads_point_coord) { + /* fp16 vec1 gl_PointSize */ + ctx->payload_tiler.primitive_size.pointer = + panfrost_emit_varyings(ctx, &varyings[idx++], + 2, invocation_count); + } + + if (fs->reads_point_coord) { + /* Special descriptor */ + panfrost_emit_point_coord(&varyings[idx++]); + } + + mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr)); + ctx->payload_vertex.postfix.varyings = varyings_p; + ctx->payload_tiler.postfix.varyings = varyings_p; +} + /* Emits attributes and varying descriptors, which should be called every draw, * excepting some obscure circumstances */ @@ -706,7 +739,6 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx) { /* TODO: Only update the dirtied buffers */ union mali_attr attrs[PIPE_MAX_ATTRIBS]; - union mali_attr varyings[PIPE_MAX_ATTRIBS]; unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count); @@ -749,39 +781,9 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx) } } - struct panfrost_varyings *vars = &ctx->vs->variants[ctx->vs->active_variant].varyings; - - for (int i = 0; i < vars->varying_buffer_count; ++i) { - mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height; - - varyings[i].elements = varying_address | 1; - varyings[i].stride = vars->varyings_stride[i]; - varyings[i].size = vars->varyings_stride[i] * invocation_count; - - /* If this varying has to be linked somewhere, do it now. See - * pan_assemble.c for the indices. TODO: Use a more generic - * linking interface */ - - if (i == 1) { - /* gl_Position */ - ctx->payload_tiler.postfix.position_varying = varying_address; - } else if (i == 2) { - /* gl_PointSize */ - ctx->payload_tiler.primitive_size.pointer = varying_address; - } - - /* Varyings appear to need 64-byte alignment */ - ctx->varying_height += ALIGN(varyings[i].size, 64); - - /* Ensure that we fit */ - assert(ctx->varying_height < ctx->varying_mem.size); - } - ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, ctx->vertex_buffer_count * sizeof(union mali_attr)); - mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, vars->varying_buffer_count * sizeof(union mali_attr)); - ctx->payload_vertex.postfix.varyings = varyings_p; - ctx->payload_tiler.postfix.varyings = varyings_p; + panfrost_emit_varying_descriptor(ctx, invocation_count); } /* Go through dirty flags and actualise them in the cmdstream. */ @@ -789,15 +791,30 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx) void panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) { + struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); + if (with_vertex_data) { panfrost_emit_vertex_data(ctx); } + bool msaa = ctx->rasterizer->base.multisample; + if (ctx->dirty & PAN_DIRTY_RASTERIZER) { ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables; - panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample); + + /* TODO: Sample size */ + SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa); + SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa); } + /* Enable job requirements at draw-time */ + + if (msaa) + job->requirements |= PAN_REQ_MSAA; + + if (ctx->depth_stencil->depth.writemask) + job->requirements |= PAN_REQ_DEPTH_WRITE; + if (ctx->occlusion_query) { ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE; ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu; @@ -809,6 +826,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; /* Late shader descriptor assignments */ + vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX]; vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX]; @@ -816,15 +834,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) vs->tripipe->midgard1.unknown1 = 0x2201; ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4; - - /* Varying descriptor is tied to the vertex shader. Also the - * fragment shader, I suppose, but it's generated with the - * vertex shader so */ - - struct panfrost_varyings *varyings = &ctx->vs->variants[ctx->vs->active_variant].varyings; - - ctx->payload_vertex.postfix.varying_meta = varyings->varyings_descriptor; - ctx->payload_tiler.postfix.varying_meta = varyings->varyings_descriptor_fragment; } if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { @@ -1050,12 +1059,21 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) /* Generate the viewport vector of the form: */ const struct pipe_viewport_state *vp = &ctx->pipe_viewport; + /* For flipped-Y buffers (signaled by negative scale), the translate is + * flipped as well */ + + bool invert_y = vp->scale[1] < 0.0; + float translate_y = vp->translate[1]; + + if (invert_y) + translate_y = ctx->pipe_framebuffer.height - translate_y; + float viewport_vec4[] = { vp->scale[0], fabsf(vp->scale[1]), vp->translate[0], - /* -1.0 * vp->translate[1] */ fabs(1.0 * vp->scale[1]) /* XXX */ + translate_y }; for (int i = 0; i < PIPE_SHADER_TYPES; ++i) { @@ -1117,6 +1135,54 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) } } + /* TODO: Upload the viewport somewhere more appropriate */ + + /* Clip bounds are encoded as floats. The viewport itself is encoded as + * (somewhat) asymmetric ints. */ + const struct pipe_scissor_state *ss = &ctx->scissor; + + struct mali_viewport view = { + /* By default, do no viewport clipping, i.e. clip to (-inf, + * inf) in each direction. Clipping to the viewport in theory + * should work, but in practice causes issues when we're not + * explicitly trying to scissor */ + + .clip_minx = -inff, + .clip_miny = -inff, + .clip_maxx = inff, + .clip_maxy = inff, + + .clip_minz = 0.0, + .clip_maxz = 1.0, + }; + + /* Always scissor to the viewport by default. */ + view.viewport0[0] = (int) (vp->translate[0] - vp->scale[0]); + view.viewport1[0] = MALI_POSITIVE((int) (vp->translate[0] + vp->scale[0])); + + view.viewport0[1] = (int) (translate_y - fabs(vp->scale[1])); + view.viewport1[1] = MALI_POSITIVE((int) (translate_y + fabs(vp->scale[1]))); + + if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) { + /* Invert scissor if needed */ + unsigned miny = invert_y ? + ctx->pipe_framebuffer.height - ss->maxy : ss->miny; + + unsigned maxy = invert_y ? + ctx->pipe_framebuffer.height - ss->miny : ss->maxy; + + /* Set the actual scissor */ + view.viewport0[0] = ss->minx; + view.viewport0[1] = miny; + view.viewport1[0] = MALI_POSITIVE(ss->maxx); + view.viewport1[1] = MALI_POSITIVE(maxy); + } + + ctx->payload_tiler.postfix.viewport = + panfrost_upload_transient(ctx, + &view, + sizeof(struct mali_viewport)); + ctx->dirty = 0; } @@ -1360,7 +1426,7 @@ panfrost_draw_vbo( /* Fallback for unsupported modes */ - if (!(ctx->draw_modes & mode)) { + if (!(ctx->draw_modes & (1 << mode))) { if (mode == PIPE_PRIM_QUADS && info->count == 4 && ctx->rasterizer && !ctx->rasterizer->base.flatshade) { mode = PIPE_PRIM_TRIANGLE_FAN; } else { @@ -1461,24 +1527,6 @@ panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso) free(hwcso); } -static void -panfrost_set_scissor(struct panfrost_context *ctx) -{ - const struct pipe_scissor_state *ss = &ctx->scissor; - - if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor && 0) { - ctx->viewport->viewport0[0] = ss->minx; - ctx->viewport->viewport0[1] = ss->miny; - ctx->viewport->viewport1[0] = MALI_POSITIVE(ss->maxx); - ctx->viewport->viewport1[1] = MALI_POSITIVE(ss->maxy); - } else { - ctx->viewport->viewport0[0] = 0; - ctx->viewport->viewport0[1] = 0; - ctx->viewport->viewport1[0] = MALI_POSITIVE(ctx->pipe_framebuffer.width); - ctx->viewport->viewport1[1] = MALI_POSITIVE(ctx->pipe_framebuffer.height); - } -} - static void * panfrost_create_rasterizer_state( struct pipe_context *pctx, @@ -1510,21 +1558,12 @@ panfrost_bind_rasterizer_state( void *hwcso) { struct panfrost_context *ctx = pan_context(pctx); - struct pipe_rasterizer_state *cso = hwcso; /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */ if (!hwcso) return; - /* If scissor test has changed, we'll need to update that now */ - bool update_scissor = !ctx->rasterizer || ctx->rasterizer->base.scissor != cso->scissor; - ctx->rasterizer = hwcso; - - /* Actualise late changes */ - if (update_scissor) - panfrost_set_scissor(ctx); - ctx->dirty |= PAN_DIRTY_RASTERIZER; } @@ -2020,7 +2059,6 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); panfrost_attach_vt_framebuffer(ctx); - panfrost_set_scissor(ctx); struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[i]->texture); bool is_scanout = panfrost_is_scanout(ctx); @@ -2054,12 +2092,8 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); panfrost_attach_vt_framebuffer(ctx); - panfrost_set_scissor(ctx); - - struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture); - if (tex->bo->layout != PAN_AFBC && !panfrost_is_scanout(ctx)) - panfrost_enable_afbc(ctx, tex, true); + /* Keep the depth FBO linear */ } } } @@ -2245,8 +2279,6 @@ panfrost_set_scissor_states(struct pipe_context *pipe, assert(num_scissors == 1); ctx->scissor = *scissors; - - panfrost_set_scissor(ctx); } static void @@ -2371,6 +2403,46 @@ panfrost_get_query_result(struct pipe_context *pipe, return true; } +static struct pipe_stream_output_target * +panfrost_create_stream_output_target(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct pipe_stream_output_target *target; + + target = CALLOC_STRUCT(pipe_stream_output_target); + + if (!target) + return NULL; + + pipe_reference_init(&target->reference, 1); + pipe_resource_reference(&target->buffer, prsc); + + target->context = pctx; + target->buffer_offset = buffer_offset; + target->buffer_size = buffer_size; + + return target; +} + +static void +panfrost_stream_output_target_destroy(struct pipe_context *pctx, + struct pipe_stream_output_target *target) +{ + pipe_resource_reference(&target->buffer, NULL); + free(target); +} + +static void +panfrost_set_stream_output_targets(struct pipe_context *pctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + /* STUB */ +} + static void panfrost_setup_hardware(struct panfrost_context *ctx) { @@ -2408,8 +2480,9 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) unsigned gpu_id; gpu_id = pscreen->driver->query_gpu_version(pscreen); - ctx->is_t6xx = gpu_id <= 0x0750; /* For now, this flag means t76x or less */ - ctx->require_sfbd = gpu_id < 0x0750; /* t76x is the first to support MFD */ + + ctx->is_t6xx = gpu_id <= 0x0750; /* For now, this flag means T760 or less */ + ctx->require_sfbd = gpu_id < 0x0750; /* T760 is the first to support MFBD */ gallium->screen = screen; @@ -2474,6 +2547,10 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) gallium->end_query = panfrost_end_query; gallium->get_query_result = panfrost_get_query_result; + gallium->create_stream_output_target = panfrost_create_stream_output_target; + gallium->stream_output_target_destroy = panfrost_stream_output_target_destroy; + gallium->set_stream_output_targets = panfrost_set_stream_output_targets; + panfrost_resource_context_init(gallium); pscreen->driver->init_context(ctx); @@ -2499,7 +2576,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) panfrost_emit_vertex_payload(ctx); panfrost_emit_tiler_payload(ctx); panfrost_invalidate_frame(ctx); - panfrost_viewport(ctx, 0.0, 1.0, 0, 0, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); panfrost_default_shader_backend(ctx); panfrost_generate_space_filler_indices();