X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fpanfrost%2Fpan_context.c;h=fd1fa7f328bd5ad52666708d26cd5503b5e20e3e;hb=b8739c24ee2fdccc60e4e18357eb9e63ae2b8183;hp=368b3b8759a26dcd53a387e6c46b44b81cae0f1b;hpb=2d22b5380cc0b66efb38f2d9cd09af8eab602011;p=mesa.git diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 368b3b8759a..fd1fa7f328b 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -41,54 +41,15 @@ #include "pan_screen.h" #include "pan_blending.h" #include "pan_blend_shaders.h" +#include "pan_util.h" #include "pan_wallpaper.h" -#ifdef DUMP_PERFORMANCE_COUNTERS static int performance_counter_number = 0; -#endif +extern const char *pan_counters_base; /* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ //#define DRY_RUN -#define SET_BIT(lval, bit, cond) \ - if (cond) \ - lval |= (bit); \ - else \ - lval &= ~(bit); - -/* TODO: Sample size, etc */ - -/* True for t6XX, false for t8xx. TODO: Run-time settable for automatic - * hardware configuration. */ - -static bool is_t6xx = false; - -/* If set, we'll require the use of single render-target framebuffer - * descriptors (SFBD), for older hardware -- specifically, fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled); - SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled); - - if (require_sfbd) { - SET_BIT(ctx->fragment_sfbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled); - } else { - SET_BIT(ctx->fragment_rts[0].format, MALI_MFBD_FORMAT_MSAA, enabled); - - SET_BIT(ctx->fragment_mfbd.unk1, (1 << 4) | (1 << 1), enabled); - - /* XXX */ - ctx->fragment_mfbd.rt_count_2 = enabled ? 4 : 1; - } -} - /* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically * indepdent between color buffers and depth/stencil). To enable, we allocate * the AFBC metadata buffer and mark that it is enabled. We do -not- actually @@ -98,8 +59,8 @@ panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) static void panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds) { - if (require_sfbd) { - printf("AFBC not supported yet on SFBD\n"); + if (ctx->require_sfbd) { + DBG("AFBC not supported yet on SFBD\n"); assert(0); } @@ -109,7 +70,7 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsr int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format); - int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */ + int stride = bytes_per_pixel * ALIGN(rsrc->base.width0, 16); stride *= 2; /* TODO: Should this be carried over? */ int main_size = stride * rsrc->base.height0; @@ -120,7 +81,7 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsr (rsrc->bo->afbc_metadata_size + main_size + 4095) / 4096, true, 0, 0, 0); - rsrc->bo->has_afbc = true; + rsrc->bo->layout = PAN_AFBC; /* Compressed textured reads use a tagged pointer to the metadata */ @@ -144,78 +105,6 @@ panfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource rsrc->bo->has_checksum = true; } -/* ..by contrast, this routine runs for every FRAGMENT job, but does no - * allocation. AFBC is enabled on a per-surface basis */ - -static void -panfrost_set_fragment_afbc(struct panfrost_context *ctx) -{ - for (int cb = 0; cb < ctx->pipe_framebuffer.nr_cbufs; ++cb) { - struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[cb]->texture; - - /* Non-AFBC is the default */ - if (!rsrc->bo->has_afbc) - continue; - - if (require_sfbd) { - fprintf(stderr, "Color AFBC not supported on SFBD\n"); - assert(0); - } - - /* Enable AFBC for the render target */ - ctx->fragment_rts[0].afbc.metadata = rsrc->bo->afbc_slab.gpu; - ctx->fragment_rts[0].afbc.stride = 0; - ctx->fragment_rts[0].afbc.unk = 0x30009; - - ctx->fragment_rts[0].format |= MALI_MFBD_FORMAT_AFBC; - - /* Point rendering to our special framebuffer */ - ctx->fragment_rts[0].framebuffer = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size; - - /* WAT? Stride is diff from the scanout case */ - ctx->fragment_rts[0].framebuffer_stride = ctx->pipe_framebuffer.width * 2 * 4; - } - - /* Enable depth/stencil AFBC for the framebuffer (not the render target) */ - if (ctx->pipe_framebuffer.zsbuf) { - struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture; - - if (rsrc->bo->has_afbc) { - if (require_sfbd) { - fprintf(stderr, "Depth AFBC not supported on SFBD\n"); - assert(0); - } - - ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; - - ctx->fragment_extra.ds_afbc.depth_stencil_afbc_metadata = rsrc->bo->afbc_slab.gpu; - ctx->fragment_extra.ds_afbc.depth_stencil_afbc_stride = 0; - - ctx->fragment_extra.ds_afbc.depth_stencil = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size; - - ctx->fragment_extra.ds_afbc.zero1 = 0x10009; - ctx->fragment_extra.ds_afbc.padding = 0x1000; - - ctx->fragment_extra.unk = 0x435; /* General 0x400 in all unks. 0x5 for depth/stencil. 0x10 for AFBC encoded depth stencil. Unclear where the 0x20 is from */ - - ctx->fragment_mfbd.unk3 |= 0x400; - } - } - - /* For the special case of a depth-only FBO, we need to attach a dummy render target */ - - if (ctx->pipe_framebuffer.nr_cbufs == 0) { - if (require_sfbd) { - fprintf(stderr, "Depth-only FBO not supported on SFBD\n"); - assert(0); - } - - ctx->fragment_rts[0].format = 0x80008000; - ctx->fragment_rts[0].framebuffer = 0; - ctx->fragment_rts[0].framebuffer_stride = 0; - } -} - /* Framebuffer descriptor */ static void @@ -232,7 +121,7 @@ panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, i fb->resolution_check = ((w + h) / 3) << 4; } -static struct mali_single_framebuffer +struct mali_single_framebuffer panfrost_emit_sfbd(struct panfrost_context *ctx) { struct mali_single_framebuffer framebuffer = { @@ -252,11 +141,32 @@ panfrost_emit_sfbd(struct panfrost_context *ctx) return framebuffer; } -static struct bifrost_framebuffer +struct bifrost_framebuffer panfrost_emit_mfbd(struct panfrost_context *ctx) { struct bifrost_framebuffer framebuffer = { - .tiler_meta = 0xf00000c600, + /* It is not yet clear what tiler_meta means or how it's + * calculated, but we can tell the lower 32-bits are a + * (monotonically increasing?) function of tile count and + * geometry complexity; I suspect it defines a memory size of + * some kind? for the tiler. It's really unclear at the + * moment... but to add to the confusion, the hardware is happy + * enough to accept a zero in this field, so we don't even have + * to worry about it right now. + * + * The byte (just after the 32-bit mark) is much more + * interesting. The higher nibble I've only ever seen as 0xF, + * but the lower one I've seen as 0x0 or 0xF, and it's not + * obvious what the difference is. But what -is- obvious is + * that when the lower nibble is zero, performance is severely + * degraded compared to when the lower nibble is set. + * Evidently, that nibble enables some sort of fast path, + * perhaps relating to caching or tile flush? Regardless, at + * this point there's no clear reason not to set it, aside from + * substantially increased memory requirements (of the misc_0 + * buffer) */ + + .tiler_meta = ((uint64_t) 0xff << 32) | 0x0, .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width), .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height), @@ -271,10 +181,23 @@ panfrost_emit_mfbd(struct panfrost_context *ctx) .unknown2 = 0x1f, - /* Presumably corresponds to unknown_address_X of SFBD */ + /* Corresponds to unknown_address_X of SFBD */ .scratchpad = ctx->scratchpad.gpu, .tiler_scratch_start = ctx->misc_0.gpu, - .tiler_scratch_middle = ctx->misc_0.gpu + /*ctx->misc_0.size*/40960, /* Size depends on the size of the framebuffer and the number of vertices */ + + /* The constant added here is, like the lower word of + * tiler_meta, (loosely) another product of framebuffer size + * and geometry complexity. It must be sufficiently large for + * the tiler_meta fast path to work; if it's too small, there + * will be DATA_INVALID_FAULTs. Conversely, it must be less + * than the total size of misc_0, or else there's no room. It's + * possible this constant configures a partition between two + * parts of misc_0? We haven't investigated the functionality, + * as these buffers are internally used by the hardware + * (presumably by the tiler) but not seemingly touched by the driver + */ + + .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000, .tiler_heap_start = ctx->tiler_heap.gpu, .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, @@ -285,7 +208,7 @@ panfrost_emit_mfbd(struct panfrost_context *ctx) /* Are we currently rendering to the screen (rather than an FBO)? */ -static bool +bool panfrost_is_scanout(struct panfrost_context *ctx) { /* If there is no color buffer, it's an FBO */ @@ -301,60 +224,6 @@ panfrost_is_scanout(struct panfrost_context *ctx) ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SHARED; } -/* The above function is for generalised fbd emission, used in both fragment as - * well as vertex/tiler payloads. This payload is specific to fragment - * payloads. */ - -static void -panfrost_new_frag_framebuffer(struct panfrost_context *ctx) -{ - mali_ptr framebuffer; - int stride; - - if (ctx->pipe_framebuffer.nr_cbufs > 0) { - framebuffer = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture)->bo->gpu[0]; - stride = util_format_get_stride(ctx->pipe_framebuffer.cbufs[0]->format, ctx->pipe_framebuffer.width); - } else { - /* Depth-only framebuffer -> dummy RT */ - framebuffer = 0; - stride = 0; - } - - /* The default is upside down from OpenGL's perspective. */ - if (panfrost_is_scanout(ctx)) { - framebuffer += stride * (ctx->pipe_framebuffer.height - 1); - stride = -stride; - } - - if (require_sfbd) { - struct mali_single_framebuffer fb = panfrost_emit_sfbd(ctx); - - fb.framebuffer = framebuffer; - fb.stride = stride; - - fb.format = 0xb84e0281; /* RGB32, no MSAA */ - memcpy(&ctx->fragment_sfbd, &fb, sizeof(fb)); - } else { - struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx); - - /* XXX: MRT case */ - fb.rt_count_2 = 1; - fb.unk3 = 0x100; - - struct bifrost_render_target rt = { - .unk1 = 0x4000000, - .format = 0x860a8899, /* RGBA32, no MSAA */ - .framebuffer = framebuffer, - .framebuffer_stride = (stride / 16) & 0xfffffff, - }; - - memcpy(&ctx->fragment_rts[0], &rt, sizeof(rt)); - - memset(&ctx->fragment_extra, 0, sizeof(ctx->fragment_extra)); - memcpy(&ctx->fragment_mfbd, &fb, sizeof(fb)); - } -} - /* Maps float 0.0-1.0 to int 0x00-0xFF */ static uint8_t normalised_float_to_u8(float f) @@ -362,102 +231,6 @@ normalised_float_to_u8(float f) return (uint8_t) (int) (f * 255.0f); } -static void -panfrost_clear_sfbd(struct panfrost_context *ctx, - bool clear_color, - bool clear_depth, - bool clear_stencil, - uint32_t packed_color, - double depth, unsigned stencil - ) -{ - struct mali_single_framebuffer *sfbd = &ctx->fragment_sfbd; - - if (clear_color) { - sfbd->clear_color_1 = packed_color; - sfbd->clear_color_2 = packed_color; - sfbd->clear_color_3 = packed_color; - sfbd->clear_color_4 = packed_color; - } - - if (clear_depth) { - sfbd->clear_depth_1 = depth; - sfbd->clear_depth_2 = depth; - sfbd->clear_depth_3 = depth; - sfbd->clear_depth_4 = depth; - } - - if (clear_stencil) { - sfbd->clear_stencil = stencil; - } - - /* Setup buffers */ - - if (clear_depth) { - sfbd->depth_buffer = ctx->depth_stencil_buffer.gpu; - sfbd->depth_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; - } - - if (clear_stencil) { - sfbd->stencil_buffer = ctx->depth_stencil_buffer.gpu; - sfbd->stencil_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; - } - - /* Set flags based on what has been cleared, for the SFBD case */ - /* XXX: What do these flags mean? */ - int clear_flags = 0x101100; - - if (clear_color && clear_depth && clear_stencil) { - /* On a tiler like this, it's fastest to clear all three buffers at once */ - - clear_flags |= MALI_CLEAR_FAST; - } else { - clear_flags |= MALI_CLEAR_SLOW; - - if (clear_stencil) - clear_flags |= MALI_CLEAR_SLOW_STENCIL; - } - - sfbd->clear_flags = clear_flags; -} - -static void -panfrost_clear_mfbd(struct panfrost_context *ctx, - bool clear_color, - bool clear_depth, - bool clear_stencil, - uint32_t packed_color, - double depth, unsigned stencil - ) -{ - struct bifrost_render_target *buffer_color = &ctx->fragment_rts[0]; - struct bifrost_framebuffer *buffer_ds = &ctx->fragment_mfbd; - - if (clear_color) { - buffer_color->clear_color_1 = packed_color; - buffer_color->clear_color_2 = packed_color; - buffer_color->clear_color_3 = packed_color; - buffer_color->clear_color_4 = packed_color; - } - - if (clear_depth) { - buffer_ds->clear_depth = depth; - } - - if (clear_stencil) { - buffer_ds->clear_stencil = stencil; - } - - if (clear_depth || clear_stencil) { - /* Setup combined 24/8 depth/stencil */ - ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; - //ctx->fragment_extra.unk = /*0x405*/0x404; - ctx->fragment_extra.unk = 0x405; - ctx->fragment_extra.ds_linear.depth = ctx->depth_stencil_buffer.gpu; - ctx->fragment_extra.ds_linear.depth_stride = ctx->pipe_framebuffer.width * 4; - } -} - static void panfrost_clear( struct pipe_context *pipe, @@ -466,40 +239,32 @@ panfrost_clear( double depth, unsigned stencil) { struct panfrost_context *ctx = pan_context(pipe); + struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); - if (!color) { - printf("Warning: clear color null?\n"); - return; - } - - /* Save settings for FBO switch */ - ctx->last_clear.buffers = buffers; - ctx->last_clear.color = color; - ctx->last_clear.depth = depth; - ctx->last_clear.depth = depth; + if (buffers & PIPE_CLEAR_COLOR) { + /* Alpha clear only meaningful without alpha channel, TODO less ad hoc */ + bool has_alpha = util_format_has_alpha(ctx->pipe_framebuffer.cbufs[0]->format); + float clear_alpha = has_alpha ? color->f[3] : 1.0f; - bool clear_color = buffers & PIPE_CLEAR_COLOR; - bool clear_depth = buffers & PIPE_CLEAR_DEPTH; - bool clear_stencil = buffers & PIPE_CLEAR_STENCIL; + uint32_t packed_color = + (normalised_float_to_u8(clear_alpha) << 24) | + (normalised_float_to_u8(color->f[2]) << 16) | + (normalised_float_to_u8(color->f[1]) << 8) | + (normalised_float_to_u8(color->f[0]) << 0); - /* Remember that we've done something */ - ctx->frame_cleared = true; + job->clear_color = packed_color; - /* Alpha clear only meaningful without alpha channel */ - bool has_alpha = ctx->pipe_framebuffer.nr_cbufs && util_format_has_alpha(ctx->pipe_framebuffer.cbufs[0]->format); - float clear_alpha = has_alpha ? color->f[3] : 1.0f; + } - uint32_t packed_color = - (normalised_float_to_u8(clear_alpha) << 24) | - (normalised_float_to_u8(color->f[2]) << 16) | - (normalised_float_to_u8(color->f[1]) << 8) | - (normalised_float_to_u8(color->f[0]) << 0); + if (buffers & PIPE_CLEAR_DEPTH) { + job->clear_depth = depth; + } - if (require_sfbd) { - panfrost_clear_sfbd(ctx, clear_color, clear_depth, clear_stencil, packed_color, depth, stencil); - } else { - panfrost_clear_mfbd(ctx, clear_color, clear_depth, clear_stencil, packed_color, depth, stencil); + if (buffers & PIPE_CLEAR_STENCIL) { + job->clear_stencil = stencil; } + + job->clear |= buffers; } static mali_ptr @@ -538,7 +303,7 @@ panfrost_attach_vt_sfbd(struct panfrost_context *ctx) static void panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) { - mali_ptr framebuffer = require_sfbd ? + mali_ptr framebuffer = ctx->require_sfbd ? panfrost_attach_vt_sfbd(ctx) : panfrost_attach_vt_mfbd(ctx); @@ -546,35 +311,6 @@ panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) ctx->payload_tiler.postfix.framebuffer = framebuffer; } -static void -panfrost_viewport(struct panfrost_context *ctx, - float depth_range_n, - float depth_range_f, - int viewport_x0, int viewport_y0, - int viewport_x1, int viewport_y1) -{ - /* Viewport encoding is asymmetric. Purpose of the floats is unknown? */ - - struct mali_viewport ret = { - .floats = { -#if 0 - -inff, -inff, - inff, inff, -#endif - 0.0, 0.0, - 2048.0, 1600.0, - }, - - .depth_range_n = depth_range_n, - .depth_range_f = depth_range_f, - - .viewport0 = { viewport_x0, viewport_y0 }, - .viewport1 = { MALI_POSITIVE(viewport_x1), MALI_POSITIVE(viewport_y1) }, - }; - - memcpy(ctx->viewport, &ret, sizeof(ret)); -} - /* Reset per-frame context, called on context initialisation as well as after * flushing a frame */ @@ -582,19 +318,17 @@ static void panfrost_invalidate_frame(struct panfrost_context *ctx) { unsigned transient_count = ctx->transient_pools[ctx->cmdstream_i].entry_index*ctx->transient_pools[0].entry_size + ctx->transient_pools[ctx->cmdstream_i].entry_offset; - printf("Uploaded transient %d bytes\n", transient_count); + DBG("Uploaded transient %d bytes\n", transient_count); /* Rotate cmdstream */ if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0]))) ctx->cmdstream_i = 0; - if (require_sfbd) + if (ctx->require_sfbd) ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); else ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); - panfrost_new_frag_framebuffer(ctx); - /* Reset varyings allocated */ ctx->varying_height = 0; @@ -627,7 +361,7 @@ panfrost_emit_vertex_payload(struct panfrost_context *ctx) .workgroups_x_shift_2 = 0x2, .workgroups_x_shift_3 = 0x5, }, - .gl_enables = 0x4 | (is_t6xx ? 0 : 0x2), + .gl_enables = 0x4 | (ctx->is_t6xx ? 0 : 0x2), }; memcpy(&ctx->payload_vertex, &payload, sizeof(payload)); @@ -646,11 +380,6 @@ panfrost_emit_tiler_payload(struct panfrost_context *ctx) }, }; - /* Reserve the viewport */ - struct panfrost_transfer t = panfrost_allocate_chunk(ctx, sizeof(struct mali_viewport), HEAP_DESCRIPTOR); - ctx->viewport = (struct mali_viewport *) t.cpu; - payload.postfix.viewport = t.gpu; - memcpy(&ctx->payload_tiler, &payload, sizeof(payload)); } @@ -819,7 +548,7 @@ panfrost_default_shader_backend(struct panfrost_context *ctx) .unknown2_4 = MALI_NO_MSAA | 0x4e0, }; - if (is_t6xx) { + if (ctx->is_t6xx) { shader.unknown2_4 |= 0x10; } @@ -918,90 +647,88 @@ panfrost_set_value_job(struct panfrost_context *ctx) ctx->set_value_job = transfer.gpu; } -/* Generate a fragment job. This should be called once per frame. (According to - * presentations, this is supposed to correspond to eglSwapBuffers) */ +static mali_ptr +panfrost_emit_varyings( + struct panfrost_context *ctx, + union mali_attr *slot, + unsigned stride, + unsigned count) +{ + mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height; + + /* Fill out the descriptor */ + slot->elements = varying_address | MALI_ATTR_LINEAR; + slot->stride = stride; + slot->size = stride * count; + + ctx->varying_height += ALIGN(slot->size, 64); + assert(ctx->varying_height < ctx->varying_mem.size); -mali_ptr -panfrost_fragment_job(struct panfrost_context *ctx) + return varying_address; +} + +static void +panfrost_emit_point_coord(union mali_attr *slot) { - /* Update fragment FBD */ - panfrost_set_fragment_afbc(ctx); + slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR; + slot->stride = slot->size = 0; +} - if (ctx->pipe_framebuffer.nr_cbufs == 1) { - struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; +static void +panfrost_emit_varying_descriptor( + struct panfrost_context *ctx, + unsigned invocation_count) +{ + /* Load the shaders */ - if (rsrc->bo->has_checksum) { - if (require_sfbd) { - fprintf(stderr, "Checksumming not supported on SFBD\n"); - assert(0); - } + struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; + struct panfrost_shader_state *fs = &ctx->fs->variants[ctx->fs->active_variant]; - int stride = util_format_get_stride(rsrc->base.format, rsrc->base.width0); + /* Allocate the varying descriptor */ - ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; - ctx->fragment_extra.unk |= 0x420; - ctx->fragment_extra.checksum_stride = rsrc->bo->checksum_stride; - ctx->fragment_extra.checksum = rsrc->bo->gpu[0] + stride * rsrc->base.height0; - } - } + size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count; + size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count; - /* The frame is complete and therefore the framebuffer descriptor is - * ready for linkage and upload */ + struct panfrost_transfer trans = panfrost_allocate_transient(ctx, + vs_size + fs_size); - size_t sz = require_sfbd ? sizeof(struct mali_single_framebuffer) : (sizeof(struct bifrost_framebuffer) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1); - struct panfrost_transfer fbd_t = panfrost_allocate_transient(ctx, sz); - off_t offset = 0; + memcpy(trans.cpu, vs->varyings, vs_size); + memcpy(trans.cpu + vs_size, fs->varyings, fs_size); - if (require_sfbd) { - /* Upload just the SFBD all at once */ - memcpy(fbd_t.cpu, &ctx->fragment_sfbd, sizeof(ctx->fragment_sfbd)); - offset += sizeof(ctx->fragment_sfbd); - } else { - /* Upload the MFBD header */ - memcpy(fbd_t.cpu, &ctx->fragment_mfbd, sizeof(ctx->fragment_mfbd)); - offset += sizeof(ctx->fragment_mfbd); - - /* Upload extra framebuffer info if necessary */ - if (ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) { - memcpy(fbd_t.cpu + offset, &ctx->fragment_extra, sizeof(struct bifrost_fb_extra)); - offset += sizeof(struct bifrost_fb_extra); - } + ctx->payload_vertex.postfix.varying_meta = trans.gpu; + ctx->payload_tiler.postfix.varying_meta = trans.gpu + vs_size; - /* Upload (single) render target */ - memcpy(fbd_t.cpu + offset, &ctx->fragment_rts[0], sizeof(struct bifrost_render_target) * 1); - } + /* Buffer indices must be in this order per our convention */ + union mali_attr varyings[PIPE_MAX_ATTRIBS]; + unsigned idx = 0; - /* Generate the fragment (frame) job */ + /* General varyings -- use the VS's, since those are more likely to be + * accurate on desktop */ - struct mali_job_descriptor_header header = { - .job_type = JOB_TYPE_FRAGMENT, - .job_index = 1, -#ifdef __LP64__ - .job_descriptor_size = 1 -#endif - }; + panfrost_emit_varyings(ctx, &varyings[idx++], + vs->general_varying_stride, invocation_count); - struct mali_payload_fragment payload = { - .min_tile_coord = MALI_COORDINATE_TO_TILE_MIN(0, 0), - .max_tile_coord = MALI_COORDINATE_TO_TILE_MAX(ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height), - .framebuffer = fbd_t.gpu | (require_sfbd ? MALI_SFBD : MALI_MFBD), - }; + /* fp32 vec4 gl_Position */ + ctx->payload_tiler.postfix.position_varying = + panfrost_emit_varyings(ctx, &varyings[idx++], + sizeof(float) * 4, invocation_count); - if (!require_sfbd && ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) { - /* Signal that there is an extra portion of the framebuffer - * descriptor */ - payload.framebuffer |= 2; - } + if (vs->writes_point_size || fs->reads_point_coord) { + /* fp16 vec1 gl_PointSize */ + ctx->payload_tiler.primitive_size.pointer = + panfrost_emit_varyings(ctx, &varyings[idx++], + 2, invocation_count); + } - /* Normally, there should be no padding. However, fragment jobs are - * shared with 64-bit Bifrost systems, and accordingly there is 4-bytes - * of zero padding in between. */ + if (fs->reads_point_coord) { + /* Special descriptor */ + panfrost_emit_point_coord(&varyings[idx++]); + } - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(header) + sizeof(payload)); - memcpy(transfer.cpu, &header, sizeof(header)); - memcpy(transfer.cpu + sizeof(header), &payload, sizeof(payload)); - return transfer.gpu; + mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr)); + ctx->payload_vertex.postfix.varyings = varyings_p; + ctx->payload_tiler.postfix.varyings = varyings_p; } /* Emits attributes and varying descriptors, which should be called every draw, @@ -1012,7 +739,6 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx) { /* TODO: Only update the dirtied buffers */ union mali_attr attrs[PIPE_MAX_ATTRIBS]; - union mali_attr varyings[PIPE_MAX_ATTRIBS]; unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count); @@ -1055,39 +781,9 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx) } } - struct panfrost_varyings *vars = &ctx->vs->variants[ctx->vs->active_variant].varyings; - - for (int i = 0; i < vars->varying_buffer_count; ++i) { - mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height; - - varyings[i].elements = varying_address | 1; - varyings[i].stride = vars->varyings_stride[i]; - varyings[i].size = vars->varyings_stride[i] * invocation_count; - - /* If this varying has to be linked somewhere, do it now. See - * pan_assemble.c for the indices. TODO: Use a more generic - * linking interface */ - - if (i == 1) { - /* gl_Position */ - ctx->payload_tiler.postfix.position_varying = varying_address; - } else if (i == 2) { - /* gl_PointSize */ - ctx->payload_tiler.primitive_size.pointer = varying_address; - } - - /* Varyings appear to need 64-byte alignment */ - ctx->varying_height += ALIGN(varyings[i].size, 64); - - /* Ensure that we fit */ - assert(ctx->varying_height < ctx->varying_mem.size); - } - ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, ctx->vertex_buffer_count * sizeof(union mali_attr)); - mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, vars->varying_buffer_count * sizeof(union mali_attr)); - ctx->payload_vertex.postfix.varyings = varyings_p; - ctx->payload_tiler.postfix.varyings = varyings_p; + panfrost_emit_varying_descriptor(ctx, invocation_count); } /* Go through dirty flags and actualise them in the cmdstream. */ @@ -1095,15 +791,30 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx) void panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) { + struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); + if (with_vertex_data) { panfrost_emit_vertex_data(ctx); } + bool msaa = ctx->rasterizer->base.multisample; + if (ctx->dirty & PAN_DIRTY_RASTERIZER) { ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables; - panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample); + + /* TODO: Sample size */ + SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa); + SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa); } + /* Enable job requirements at draw-time */ + + if (msaa) + job->requirements |= PAN_REQ_MSAA; + + if (ctx->depth_stencil->depth.writemask) + job->requirements |= PAN_REQ_DEPTH_WRITE; + if (ctx->occlusion_query) { ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE; ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu; @@ -1115,6 +826,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; /* Late shader descriptor assignments */ + vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX]; vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX]; @@ -1122,15 +834,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) vs->tripipe->midgard1.unknown1 = 0x2201; ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4; - - /* Varying descriptor is tied to the vertex shader. Also the - * fragment shader, I suppose, but it's generated with the - * vertex shader so */ - - struct panfrost_varyings *varyings = &ctx->vs->variants[ctx->vs->active_variant].varyings; - - ctx->payload_vertex.postfix.varying_meta = varyings->varyings_descriptor; - ctx->payload_tiler.postfix.varying_meta = varyings->varyings_descriptor_fragment; } if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { @@ -1209,7 +912,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) (ctx->blend->equation.alpha_mode == 0x122) && (ctx->blend->equation.color_mask == 0xf); - if (require_sfbd) { + if (ctx->require_sfbd) { /* When only a single render target platform is used, the blend * information is inside the shader meta itself. We * additionally need to signal CAN_DISCARD for nontrivial blend @@ -1232,7 +935,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4; - if (!require_sfbd) { + if (!ctx->require_sfbd) { /* Additional blend descriptor tacked on for jobs using MFBD */ unsigned blend_count = 0; @@ -1322,7 +1025,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) int s = ctx->sampler_views[t][i]->hw.nr_mipmap_levels; if (!rsrc->bo->is_mipmap) { - if (is_t6xx) { + if (ctx->is_t6xx) { /* HW ERRATA, not needed after t6XX */ ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0]; @@ -1337,7 +1040,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) /* Restore */ ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s; - if (is_t6xx) { + if (ctx->is_t6xx) { ctx->sampler_views[t][i]->hw.unknown3A = 0; } } @@ -1356,12 +1059,21 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) /* Generate the viewport vector of the form: */ const struct pipe_viewport_state *vp = &ctx->pipe_viewport; + /* For flipped-Y buffers (signaled by negative scale), the translate is + * flipped as well */ + + bool invert_y = vp->scale[1] < 0.0; + float translate_y = vp->translate[1]; + + if (invert_y) + translate_y = ctx->pipe_framebuffer.height - translate_y; + float viewport_vec4[] = { vp->scale[0], fabsf(vp->scale[1]), vp->translate[0], - /* -1.0 * vp->translate[1] */ fabs(1.0 * vp->scale[1]) /* XXX */ + translate_y }; for (int i = 0; i < PIPE_SHADER_TYPES; ++i) { @@ -1402,7 +1114,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) break; default: - printf("Unknown shader stage %d in uniform upload\n", i); + DBG("Unknown shader stage %d in uniform upload\n", i); assert(0); } @@ -1423,6 +1135,54 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) } } + /* TODO: Upload the viewport somewhere more appropriate */ + + /* Clip bounds are encoded as floats. The viewport itself is encoded as + * (somewhat) asymmetric ints. */ + const struct pipe_scissor_state *ss = &ctx->scissor; + + struct mali_viewport view = { + /* By default, do no viewport clipping, i.e. clip to (-inf, + * inf) in each direction. Clipping to the viewport in theory + * should work, but in practice causes issues when we're not + * explicitly trying to scissor */ + + .clip_minx = -inff, + .clip_miny = -inff, + .clip_maxx = inff, + .clip_maxy = inff, + + .clip_minz = 0.0, + .clip_maxz = 1.0, + }; + + /* Always scissor to the viewport by default. */ + view.viewport0[0] = (int) (vp->translate[0] - vp->scale[0]); + view.viewport1[0] = MALI_POSITIVE((int) (vp->translate[0] + vp->scale[0])); + + view.viewport0[1] = (int) (translate_y - fabs(vp->scale[1])); + view.viewport1[1] = MALI_POSITIVE((int) (translate_y + fabs(vp->scale[1]))); + + if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) { + /* Invert scissor if needed */ + unsigned miny = invert_y ? + ctx->pipe_framebuffer.height - ss->maxy : ss->miny; + + unsigned maxy = invert_y ? + ctx->pipe_framebuffer.height - ss->miny : ss->maxy; + + /* Set the actual scissor */ + view.viewport0[0] = ss->minx; + view.viewport0[1] = miny; + view.viewport1[0] = MALI_POSITIVE(ss->maxx); + view.viewport1[1] = MALI_POSITIVE(maxy); + } + + ctx->payload_tiler.postfix.viewport = + panfrost_upload_transient(ctx, + &view, + sizeof(struct mali_viewport)); + ctx->dirty = 0; } @@ -1433,7 +1193,7 @@ panfrost_queue_draw(struct panfrost_context *ctx) { /* TODO: Expand the array? */ if (ctx->draw_count >= MAX_DRAW_CALLS) { - printf("Job buffer overflow, ignoring draw\n"); + DBG("Job buffer overflow, ignoring draw\n"); assert(0); } @@ -1496,7 +1256,8 @@ panfrost_link_jobs(struct panfrost_context *ctx) /* The entire frame is in memory -- send it off to the kernel! */ static void -panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate) +panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate, + struct pipe_fence_handle **fence) { struct pipe_context *gallium = (struct pipe_context *) ctx; struct panfrost_screen *screen = pan_screen(gallium->screen); @@ -1522,29 +1283,29 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate) /* If visual, we can stall a frame */ - if (panfrost_is_scanout(ctx) && !flush_immediate) - screen->driver->force_flush_fragment(ctx); + if (!flush_immediate) + screen->driver->force_flush_fragment(ctx, fence); screen->last_fragment_id = fragment_id; screen->last_fragment_flushed = false; /* If readback, flush now (hurts the pipelined performance) */ - if (panfrost_is_scanout(ctx) && flush_immediate) - screen->driver->force_flush_fragment(ctx); - -#ifdef DUMP_PERFORMANCE_COUNTERS - char filename[128]; - snprintf(filename, sizeof(filename), "/dev/shm/frame%d.mdgprf", ++performance_counter_number); - FILE *fp = fopen(filename, "wb"); - fwrite(screen->perf_counters.cpu, 4096, sizeof(uint32_t), fp); - fclose(fp); -#endif + if (flush_immediate) + screen->driver->force_flush_fragment(ctx, fence); + + if (screen->driver->dump_counters && pan_counters_base) { + screen->driver->dump_counters(screen); + + char filename[128]; + snprintf(filename, sizeof(filename), "%s/frame%d.mdgprf", pan_counters_base, ++performance_counter_number); + FILE *fp = fopen(filename, "wb"); + fwrite(screen->perf_counters.cpu, 4096, sizeof(uint32_t), fp); + fclose(fp); + } #endif } -bool dont_scanout = false; - void panfrost_flush( struct pipe_context *pipe, @@ -1552,29 +1313,16 @@ panfrost_flush( unsigned flags) { struct panfrost_context *ctx = pan_context(pipe); + struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); - /* If there is nothing drawn, skip the frame */ - if (!ctx->draw_count && !ctx->frame_cleared) return; - - if (!ctx->frame_cleared) { - /* While there are draws, there was no clear. This is a partial - * update, which needs to be handled via the "wallpaper" - * method. We also need to fake a clear, just to get the - * FRAGMENT job correct. */ - - panfrost_clear(&ctx->base, ctx->last_clear.buffers, ctx->last_clear.color, ctx->last_clear.depth, ctx->last_clear.stencil); - - panfrost_draw_wallpaper(pipe); - } - - /* Frame clear handled, reset */ - ctx->frame_cleared = false; + /* Nothing to do! */ + if (!ctx->draw_count && !job->clear) return; /* Whether to stall the pipeline for immediately correct results */ bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME; /* Submit the frame itself */ - panfrost_submit_frame(ctx, flush_immediate); + panfrost_submit_frame(ctx, flush_immediate, fence); /* Prepare for the next frame */ panfrost_invalidate_frame(ctx); @@ -1598,7 +1346,7 @@ g2m_draw_mode(enum pipe_prim_type mode) DEFINE_CASE(POLYGON); default: - printf("Illegal draw mode %d\n", mode); + DBG("Illegal draw mode %d\n", mode); assert(0); return MALI_LINE_LOOP; } @@ -1620,7 +1368,7 @@ panfrost_translate_index_size(unsigned size) return MALI_DRAW_INDEXED_UINT32; default: - printf("Unknown index size %d\n", size); + DBG("Unknown index size %d\n", size); assert(0); return 0; } @@ -1657,11 +1405,6 @@ panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe } } -static void -panfrost_draw_vbo( - struct pipe_context *pipe, - const struct pipe_draw_info *info); - #define CALCULATE_MIN_MAX_INDEX(T, buffer, start, count) \ for (unsigned _idx = (start); _idx < (start + count); ++_idx) { \ T idx = buffer[_idx]; \ @@ -1683,7 +1426,7 @@ panfrost_draw_vbo( /* Fallback for unsupported modes */ - if (!(ctx->draw_modes & mode)) { + if (!(ctx->draw_modes & (1 << mode))) { if (mode == PIPE_PRIM_QUADS && info->count == 4 && ctx->rasterizer && !ctx->rasterizer->base.flatshade) { mode = PIPE_PRIM_TRIANGLE_FAN; } else { @@ -1698,6 +1441,11 @@ panfrost_draw_vbo( } } + /* Now that we have a guaranteed terminating path, find the job. + * Assignment commented out to prevent unused warning */ + + /* struct panfrost_job *job = */ panfrost_get_job_for_fbo(ctx); + ctx->payload_tiler.prefix.draw_mode = g2m_draw_mode(mode); ctx->vertex_count = info->count; @@ -1779,35 +1527,18 @@ panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso) free(hwcso); } -static void -panfrost_set_scissor(struct panfrost_context *ctx) -{ - const struct pipe_scissor_state *ss = &ctx->scissor; - - if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor && 0) { - ctx->viewport->viewport0[0] = ss->minx; - ctx->viewport->viewport0[1] = ss->miny; - ctx->viewport->viewport1[0] = MALI_POSITIVE(ss->maxx); - ctx->viewport->viewport1[1] = MALI_POSITIVE(ss->maxy); - } else { - ctx->viewport->viewport0[0] = 0; - ctx->viewport->viewport0[1] = 0; - ctx->viewport->viewport1[0] = MALI_POSITIVE(ctx->pipe_framebuffer.width); - ctx->viewport->viewport1[1] = MALI_POSITIVE(ctx->pipe_framebuffer.height); - } -} - static void * panfrost_create_rasterizer_state( struct pipe_context *pctx, const struct pipe_rasterizer_state *cso) { + struct panfrost_context *ctx = pan_context(pctx); struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer); so->base = *cso; /* Bitmask, unknown meaning of the start value */ - so->tiler_gl_enables = is_t6xx ? 0x105 : 0x7; + so->tiler_gl_enables = ctx->is_t6xx ? 0x105 : 0x7; so->tiler_gl_enables |= MALI_FRONT_FACE( cso->front_ccw ? MALI_CCW : MALI_CW); @@ -1827,21 +1558,12 @@ panfrost_bind_rasterizer_state( void *hwcso) { struct panfrost_context *ctx = pan_context(pctx); - struct pipe_rasterizer_state *cso = hwcso; /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */ if (!hwcso) return; - /* If scissor test has changed, we'll need to update that now */ - bool update_scissor = !ctx->rasterizer || ctx->rasterizer->base.scissor != cso->scissor; - ctx->rasterizer = hwcso; - - /* Actualise late changes */ - if (update_scissor) - panfrost_set_scissor(ctx); - ctx->dirty |= PAN_DIRTY_RASTERIZER; } @@ -1894,7 +1616,9 @@ panfrost_bind_vertex_elements_state( static void panfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso) { - printf("Vertex elements delete leaks descriptor\n"); + struct panfrost_vertex_state *so = (struct panfrost_vertex_state *) hwcso; + unsigned bytes = sizeof(struct mali_attr_meta) * so->num_elements; + DBG("Vertex elements delete leaks descriptor (%d bytes)\n", bytes); free(hwcso); } @@ -1919,7 +1643,15 @@ panfrost_delete_shader_state( struct pipe_context *pctx, void *so) { - printf("Deleting shader state maybe leaks tokens, per-variant compiled shaders, per-variant descriptors\n"); + struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so; + + if (cso->base.type == PIPE_SHADER_IR_TGSI) { + DBG("Deleting TGSI shader leaks duplicated tokens\n"); + } + + unsigned leak = cso->variant_count * sizeof(struct mali_shader_meta); + DBG("Deleting shader state leaks descriptors (%d bytes), and shader bytecode\n", leak); + free(so); } @@ -2147,7 +1879,7 @@ panfrost_set_constant_buffer( } else if (buf->user_buffer) { cpu = buf->user_buffer; } else { - printf("No constant buffer?\n"); + DBG("No constant buffer?\n"); return; } @@ -2209,6 +1941,25 @@ panfrost_create_sampler_view( enum mali_format format = panfrost_find_format(desc); + bool is_depth = desc->format == PIPE_FORMAT_Z32_UNORM; + + unsigned usage2_layout = 0x10; + + switch (prsrc->bo->layout) { + case PAN_AFBC: + usage2_layout |= 0x8 | 0x4; + break; + case PAN_TILED: + usage2_layout |= 0x1; + break; + case PAN_LINEAR: + usage2_layout |= is_depth ? 0x1 : 0x2; + break; + default: + assert(0); + break; + } + struct mali_texture_descriptor texture_descriptor = { .width = MALI_POSITIVE(texture->width0), .height = MALI_POSITIVE(texture->height0), @@ -2222,11 +1973,7 @@ panfrost_create_sampler_view( .usage1 = 0x0, .is_not_cubemap = 1, - /* 0x11 - regular texture 2d, uncompressed tiled */ - /* 0x12 - regular texture 2d, uncompressed linear */ - /* 0x1c - AFBC compressed (internally tiled, probably) texture 2D */ - - .usage2 = prsrc->bo->has_afbc ? 0x1c : (prsrc->bo->tiled ? 0x11 : 0x12), + .usage2 = usage2_layout }, .swizzle = panfrost_translate_swizzle_4(user_swizzle) @@ -2286,8 +2033,8 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs; ctx->pipe_framebuffer.samples = fb->samples; ctx->pipe_framebuffer.layers = fb->layers; - ctx->pipe_framebuffer.width = ALIGN(fb->width, 16); - ctx->pipe_framebuffer.height = ALIGN(fb->height, 16); + ctx->pipe_framebuffer.width = fb->width; + ctx->pipe_framebuffer.height = fb->height; for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL; @@ -2296,7 +2043,7 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, if (ctx->pipe_framebuffer.cbufs[i] == cb) continue; if (cb && (i != 0)) { - printf("XXX: Multiple render targets not supported before t7xx!\n"); + DBG("XXX: Multiple render targets not supported before t7xx!\n"); assert(0); } @@ -2306,19 +2053,17 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, if (!cb) continue; - if (require_sfbd) + if (ctx->require_sfbd) ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); else ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); panfrost_attach_vt_framebuffer(ctx); - panfrost_new_frag_framebuffer(ctx); - panfrost_set_scissor(ctx); struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[i]->texture); bool is_scanout = panfrost_is_scanout(ctx); - if (!is_scanout && !tex->bo->has_afbc) { + if (!is_scanout && tex->bo->layout != PAN_AFBC) { /* The blob is aggressive about enabling AFBC. As such, * it's pretty much necessary to use it here, since we * have no traces of non-compressed FBO. */ @@ -2341,26 +2086,17 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, if (zb) { /* FBO has depth */ - if (require_sfbd) + if (ctx->require_sfbd) ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); else ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); panfrost_attach_vt_framebuffer(ctx); - panfrost_new_frag_framebuffer(ctx); - panfrost_set_scissor(ctx); - - struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture); - if (!tex->bo->has_afbc && !panfrost_is_scanout(ctx)) - panfrost_enable_afbc(ctx, tex, true); + /* Keep the depth FBO linear */ } } } - - /* Force a clear XXX wrong? */ - if (ctx->last_clear.color) - panfrost_clear(&ctx->base, ctx->last_clear.buffers, ctx->last_clear.color, ctx->last_clear.depth, ctx->last_clear.stencil); } static void * @@ -2412,7 +2148,12 @@ static void panfrost_delete_blend_state(struct pipe_context *pipe, void *blend) { - printf("Deleting blend state may leak blend shader\n"); + struct panfrost_blend_state *so = (struct panfrost_blend_state *) blend; + + if (so->has_blend_shader) { + DBG("Deleting blend state leak blend shaders bytecode\n"); + } + free(blend); } @@ -2538,8 +2279,6 @@ panfrost_set_scissor_states(struct pipe_context *pipe, assert(num_scissors == 1); ctx->scissor = *scissors; - - panfrost_set_scissor(ctx); } static void @@ -2560,9 +2299,16 @@ static void panfrost_destroy(struct pipe_context *pipe) { struct panfrost_context *panfrost = pan_context(pipe); + struct panfrost_screen *screen = pan_screen(pipe->screen); if (panfrost->blitter) util_blitter_destroy(panfrost->blitter); + + screen->driver->free_slab(screen, &panfrost->scratchpad); + screen->driver->free_slab(screen, &panfrost->varying_mem); + screen->driver->free_slab(screen, &panfrost->shaders); + screen->driver->free_slab(screen, &panfrost->tiler_heap); + screen->driver->free_slab(screen, &panfrost->misc_0); } static struct pipe_query * @@ -2591,6 +2337,7 @@ panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) struct panfrost_query *query = (struct panfrost_query *) q; switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: { @@ -2603,7 +2350,7 @@ panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) } default: - fprintf(stderr, "Skipping query %d\n", query->type); + DBG("Skipping query %d\n", query->type); break; } @@ -2633,23 +2380,69 @@ panfrost_get_query_result(struct pipe_context *pipe, panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME); switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: { /* Read back the query results */ unsigned *result = (unsigned *) query->transfer.cpu; unsigned passed = *result; - vresult->b = !!passed; + if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { + vresult->u64 = passed; + } else { + vresult->b = !!passed; + } + break; } default: - fprintf(stderr, "Skipped query get %d\n", query->type); + DBG("Skipped query get %d\n", query->type); break; } return true; } +static struct pipe_stream_output_target * +panfrost_create_stream_output_target(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct pipe_stream_output_target *target; + + target = CALLOC_STRUCT(pipe_stream_output_target); + + if (!target) + return NULL; + + pipe_reference_init(&target->reference, 1); + pipe_resource_reference(&target->buffer, prsc); + + target->context = pctx; + target->buffer_offset = buffer_offset; + target->buffer_size = buffer_size; + + return target; +} + +static void +panfrost_stream_output_target_destroy(struct pipe_context *pctx, + struct pipe_stream_output_target *target) +{ + pipe_resource_reference(&target->buffer, NULL); + free(target); +} + +static void +panfrost_set_stream_output_targets(struct pipe_context *pctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + /* STUB */ +} + static void panfrost_setup_hardware(struct panfrost_context *ctx) { @@ -2667,10 +2460,10 @@ panfrost_setup_hardware(struct panfrost_context *ctx) } screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0); - screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, 0, 0, 0); + screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0); screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0); - screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_GROWABLE, 1, 128); - screen->driver->allocate_slab(screen, &ctx->misc_0, 128, false, PAN_ALLOCATE_GROWABLE, 1, 128); + screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); + screen->driver->allocate_slab(screen, &ctx->misc_0, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); } @@ -2681,8 +2474,15 @@ struct pipe_context * panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct panfrost_context *ctx = CALLOC_STRUCT(panfrost_context); + struct panfrost_screen *pscreen = pan_screen(screen); memset(ctx, 0, sizeof(*ctx)); struct pipe_context *gallium = (struct pipe_context *) ctx; + unsigned gpu_id; + + gpu_id = pscreen->driver->query_gpu_version(pscreen); + + ctx->is_t6xx = gpu_id <= 0x0750; /* For now, this flag means T760 or less */ + ctx->require_sfbd = gpu_id < 0x0750; /* T760 is the first to support MFBD */ gallium->screen = screen; @@ -2747,8 +2547,14 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) gallium->end_query = panfrost_end_query; gallium->get_query_result = panfrost_get_query_result; + gallium->create_stream_output_target = panfrost_create_stream_output_target; + gallium->stream_output_target_destroy = panfrost_stream_output_target_destroy; + gallium->set_stream_output_targets = panfrost_set_stream_output_targets; + panfrost_resource_context_init(gallium); + pscreen->driver->init_context(ctx); + panfrost_setup_hardware(ctx); /* XXX: leaks */ @@ -2766,10 +2572,10 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) /* Prepare for render! */ + panfrost_job_init(ctx); panfrost_emit_vertex_payload(ctx); panfrost_emit_tiler_payload(ctx); panfrost_invalidate_frame(ctx); - panfrost_viewport(ctx, 0.0, 1.0, 0, 0, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); panfrost_default_shader_backend(ctx); panfrost_generate_space_filler_indices();