panfrost: Merge AFBC slab with BO backing
[mesa.git] / src / gallium / drivers / panfrost / pan_resource.c
index 0b97d9c3461a99931cfa361e45855251d6c3c482..d8f1a9b521f4328771ef911d67cda7a89af3867d 100644 (file)
@@ -1,31 +1,33 @@
-/**************************************************************************
- *
- * Copyright 2008 VMware, Inc.
- * Copyright 2014 Broadcom
- * Copyright 2018 Alyssa Rosenzweig
- * All Rights Reserved.
+/*
+ * Copyright (C) 2008 VMware, Inc.
+ * Copyright (C) 2014 Broadcom
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig
+ * Copyright (C) 2019 Collabora
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
  *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * Authors (Collabora):
+ *   Tomeu Vizoso <tomeu.vizoso@collabora.com>
+ *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  *
- **************************************************************************/
+ */
 
 #include <xf86drm.h>
 #include <fcntl.h>
 #include "util/u_surface.h"
 #include "util/u_transfer.h"
 #include "util/u_transfer_helper.h"
+#include "util/u_gen_mipmap.h"
 
 #include "pan_context.h"
 #include "pan_screen.h"
 #include "pan_resource.h"
-#include "pan_swizzle.h"
 #include "pan_util.h"
+#include "pan_tiling.h"
 
 static struct pipe_resource *
 panfrost_resource_from_handle(struct pipe_screen *pscreen,
@@ -56,7 +59,7 @@ panfrost_resource_from_handle(struct pipe_screen *pscreen,
 
         assert(whandle->type == WINSYS_HANDLE_TYPE_FD);
 
-        rsc = CALLOC_STRUCT(panfrost_resource);
+        rsc = rzalloc(pscreen, struct panfrost_resource);
         if (!rsc)
                 return NULL;
 
@@ -67,7 +70,9 @@ panfrost_resource_from_handle(struct pipe_screen *pscreen,
         pipe_reference_init(&prsc->reference, 1);
         prsc->screen = pscreen;
 
-       rsc->bo = screen->driver->import_bo(screen, whandle);
+       rsc->bo = panfrost_drm_import_bo(screen, whandle);
+       rsc->bo->slices[0].stride = whandle->stride;
+        rsc->bo->slices[0].initialized = true;
 
        if (screen->ro) {
                rsc->scanout =
@@ -88,10 +93,7 @@ panfrost_resource_get_handle(struct pipe_screen *pscreen,
         struct panfrost_screen *screen = pan_screen(pscreen);
         struct panfrost_resource *rsrc = (struct panfrost_resource *) pt;
         struct renderonly_scanout *scanout = rsrc->scanout;
-        int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format);
-        int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */
 
-        handle->stride = stride;
         handle->modifier = DRM_FORMAT_MOD_INVALID;
 
        if (handle->type == WINSYS_HANDLE_TYPE_SHARED) {
@@ -101,6 +103,7 @@ panfrost_resource_get_handle(struct pipe_screen *pscreen,
                        return TRUE;
 
                handle->handle = rsrc->bo->gem_handle;
+               handle->stride = rsrc->bo->slices[0].stride;
                return TRUE;
        } else if (handle->type == WINSYS_HANDLE_TYPE_FD) {
                 if (scanout) {
@@ -113,11 +116,14 @@ panfrost_resource_get_handle(struct pipe_screen *pscreen,
                         if (ret == -1)
                                 return FALSE;
 
+                        handle->stride = scanout->stride;
                         handle->handle = args.fd;
 
                         return TRUE;
                 } else
-                       return screen->driver->export_bo(screen, rsrc->bo->gem_handle, handle);
+                       return panfrost_drm_export_bo(screen, rsrc->bo->gem_handle,
+                                                      rsrc->bo->slices[0].stride,
+                                                      handle);
        }
 
        return FALSE;
@@ -129,15 +135,6 @@ panfrost_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
         //DBG("TODO %s\n", __func__);
 }
 
-static void
-panfrost_blit(struct pipe_context *pipe,
-              const struct pipe_blit_info *info)
-{
-        /* STUB */
-        DBG("Skipping blit XXX\n");
-        return;
-}
-
 static struct pipe_surface *
 panfrost_create_surface(struct pipe_context *pipe,
                         struct pipe_resource *pt,
@@ -145,7 +142,7 @@ panfrost_create_surface(struct pipe_context *pipe,
 {
         struct pipe_surface *ps = NULL;
 
-        ps = CALLOC_STRUCT(pipe_surface);
+        ps = rzalloc(pipe, struct pipe_surface);
 
         if (ps) {
                 pipe_reference_init(&ps->reference, 1);
@@ -180,22 +177,109 @@ panfrost_surface_destroy(struct pipe_context *pipe,
 {
         assert(surf->texture);
         pipe_resource_reference(&surf->texture, NULL);
-        free(surf);
+        ralloc_free(surf);
 }
 
-static struct panfrost_bo *
-panfrost_create_bo(struct panfrost_screen *screen, const struct pipe_resource *template)
+static void
+panfrost_setup_slices(const struct pipe_resource *tmpl, struct panfrost_bo *bo)
 {
-       struct panfrost_bo *bo = CALLOC_STRUCT(panfrost_bo);
+        unsigned width = tmpl->width0;
+        unsigned height = tmpl->height0;
+        unsigned depth = tmpl->depth0;
+        unsigned bytes_per_pixel = util_format_get_blocksize(tmpl->format);
+
+        assert(depth > 0);
+
+        /* Tiled operates blockwise; linear is packed. Also, anything
+         * we render to has to be tile-aligned. Maybe not strictly
+         * necessary, but we're not *that* pressed for memory and it
+         * makes code a lot simpler */
+
+        bool renderable = tmpl->bind &
+                (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL);
+        bool afbc = bo->layout == PAN_AFBC;
+        bool tiled = bo->layout == PAN_TILED;
+        bool should_align = renderable || tiled;
+
+        /* We don't know how to specify a 2D stride for 3D textures */
+
+        bool can_align_stride =
+                tmpl->target != PIPE_TEXTURE_3D;
+
+        should_align &= can_align_stride;
+
+        unsigned offset = 0;
+        unsigned size_2d = 0;
+
+        for (unsigned l = 0; l <= tmpl->last_level; ++l) {
+                struct panfrost_slice *slice = &bo->slices[l];
+
+                unsigned effective_width = width;
+                unsigned effective_height = height;
+                unsigned effective_depth = depth;
+
+                if (should_align) {
+                        effective_width = ALIGN(effective_width, 16);
+                        effective_height = ALIGN(effective_height, 16);
+
+                        /* We don't need to align depth */
+                }
+
+                slice->offset = offset;
+
+                /* Compute the would-be stride */
+                unsigned stride = bytes_per_pixel * effective_width;
+
+                /* ..but cache-line align it for performance */
+                if (can_align_stride && bo->layout == PAN_LINEAR)
+                        stride = ALIGN(stride, 64);
+
+                slice->stride = stride;
+
+                unsigned slice_one_size = slice->stride * effective_height;
+                unsigned slice_full_size = slice_one_size * effective_depth;
+
+                /* Report 2D size for 3D texturing */
+
+                if (l == 0)
+                        size_2d = slice_one_size;
+
+                /* Compute AFBC sizes if necessary */
+                if (afbc) {
+                        slice->header_size =
+                                panfrost_afbc_header_size(width, height);
+
+                        offset += slice->header_size;
+                }
+
+                offset += slice_full_size;
+
+                width = u_minify(width, 1);
+                height = u_minify(height, 1);
+                depth = u_minify(depth, 1);
+        }
+
+        assert(tmpl->array_size);
 
-        /* Calculate the size of the bo */
+        if (tmpl->target != PIPE_TEXTURE_3D) {
+                /* Arrays and cubemaps have the entire miptree duplicated */
 
-        int bytes_per_pixel = util_format_get_blocksize(template->format);
-        int stride = bytes_per_pixel * template->width0; /* TODO: Alignment? */
-        size_t sz = stride;
+                bo->cubemap_stride = ALIGN(offset, 64);
+                bo->size = ALIGN(bo->cubemap_stride * tmpl->array_size, 4096);
+        } else {
+                /* 3D strides across the 2D layers */
+                assert(tmpl->array_size == 1);
 
-        if (template->height0) sz *= template->height0;
-        if (template->depth0) sz *= template->depth0;
+                bo->cubemap_stride = size_2d;
+                bo->size = ALIGN(offset, 4096);
+        }
+}
+
+static struct panfrost_bo *
+panfrost_create_bo(struct panfrost_screen *screen, const struct pipe_resource *template)
+{
+       struct panfrost_bo *bo = rzalloc(screen, struct panfrost_bo);
+        pipe_reference_init(&bo->reference, 1);
 
         /* Based on the usage, figure out what storing will be used. There are
          * various tradeoffs:
@@ -213,30 +297,28 @@ panfrost_create_bo(struct panfrost_screen *screen, const struct pipe_resource *t
          */
 
         /* Tiling textures is almost always faster, unless we only use it once */
-        bool should_tile = (template->usage != PIPE_USAGE_STREAM) && (template->bind & PIPE_BIND_SAMPLER_VIEW);
+
+        bool is_texture = (template->bind & PIPE_BIND_SAMPLER_VIEW);
+        bool is_2d = template->depth0 == 1 && template->array_size == 1;
+        bool is_streaming = (template->usage != PIPE_USAGE_STREAM);
+
+        bool should_tile = is_streaming && is_texture && is_2d;
+
+        /* Depth/stencil can't be tiled, only linear or AFBC */
+        should_tile &= !(template->bind & PIPE_BIND_DEPTH_STENCIL);
 
         /* Set the layout appropriately */
         bo->layout = should_tile ? PAN_TILED : PAN_LINEAR;
 
-        if (bo->layout == PAN_TILED) {
-                /* For tiled, we don't map directly, so just malloc any old buffer */
+        panfrost_setup_slices(template, bo);
 
-                for (int l = 0; l < (template->last_level + 1); ++l) {
-                        bo->cpu[l] = malloc(sz);
-                        sz >>= 2;
-                }
-        } else {
-                /* But for linear, we can! */
+        struct panfrost_memory mem;
 
-                struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, sz, HEAP_TEXTURE);
-                struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry;
-                struct panfrost_memory *backing = (struct panfrost_memory *) entry->slab;
-                bo->entry[0] = p_entry;
-                bo->cpu[0] = backing->cpu + p_entry->offset;
-                bo->gpu[0] = backing->gpu + p_entry->offset;
+        panfrost_drm_allocate_slab(screen, &mem, bo->size / 4096, true, 0, 0, 0);
 
-                /* TODO: Mipmap */
-        }
+        bo->cpu = mem.cpu;
+        bo->gpu = mem.gpu;
+        bo->gem_handle = mem.gem_handle;
 
         return bo;
 }
@@ -245,7 +327,7 @@ static struct pipe_resource *
 panfrost_resource_create(struct pipe_screen *screen,
                          const struct pipe_resource *template)
 {
-        struct panfrost_resource *so = CALLOC_STRUCT(panfrost_resource);
+        struct panfrost_resource *so = rzalloc(screen, struct panfrost_resource);
         struct panfrost_screen *pscreen = (struct panfrost_screen *) screen;
 
         so->base = *template;
@@ -259,13 +341,17 @@ panfrost_resource_create(struct pipe_screen *screen,
                 case PIPE_TEXTURE_1D:
                 case PIPE_TEXTURE_2D:
                 case PIPE_TEXTURE_3D:
+                case PIPE_TEXTURE_CUBE:
                 case PIPE_TEXTURE_RECT:
+                case PIPE_TEXTURE_2D_ARRAY:
                         break;
                 default:
                         DBG("Unknown texture target %d\n", template->target);
                         assert(0);
         }
 
+        util_range_init(&so->valid_buffer_range);
+
         if (template->bind & PIPE_BIND_DISPLAY_TARGET ||
             template->bind & PIPE_BIND_SCANOUT ||
             template->bind & PIPE_BIND_SHARED) {
@@ -273,8 +359,6 @@ panfrost_resource_create(struct pipe_screen *screen,
                 struct renderonly_scanout *scanout;
                 struct winsys_handle handle;
 
-                /* TODO: align width0 and height0? */
-
                 scanout = renderonly_scanout_for_resource(&scanout_templat,
                                                           pscreen->ro, &handle);
                 if (!scanout)
@@ -299,38 +383,50 @@ panfrost_resource_create(struct pipe_screen *screen,
 }
 
 static void
-panfrost_destroy_bo(struct panfrost_screen *screen, struct panfrost_bo *pbo)
+panfrost_destroy_bo(struct panfrost_screen *screen, struct panfrost_bo *bo)
 {
-       struct panfrost_bo *bo = (struct panfrost_bo *)pbo;
-
-        for (int l = 0; l < MAX_MIP_LEVELS; ++l) {
-                if (bo->entry[l] != NULL) {
-                        /* Most allocations have an entry to free */
-                        bo->entry[l]->freed = true;
-                        pb_slab_free(&screen->slabs, &bo->entry[l]->base);
-                }
+        if (!bo->imported) {
+                struct panfrost_memory mem = {
+                        .cpu = bo->cpu,
+                        .gpu = bo->gpu,
+                        .size = bo->size,
+                        .gem_handle = bo->gem_handle,
+                };
+
+                panfrost_drm_free_slab(screen, &mem);
         }
 
-        if (bo->layout == PAN_TILED) {
-                /* Tiled has a malloc'd CPU, so just plain ol' free needed */
-
-                for (int l = 0; l < MAX_MIP_LEVELS; ++l) {
-                        free(bo->cpu[l]);
-                }
+        if (bo->has_checksum) {
+                struct panfrost_memory mem = {
+                        .cpu = bo->checksum_slab.cpu,
+                        .gpu = bo->checksum_slab.gpu,
+                        .size = bo->checksum_slab.size,
+                        .gem_handle = bo->checksum_slab.gem_handle,
+                };
+
+                panfrost_drm_free_slab(screen, &mem);
         }
 
-        if (bo->layout == PAN_AFBC) {
-                /* TODO */
-                DBG("--leaking afbc (%d bytes)--\n", bo->afbc_metadata_size);
+        if (bo->imported) {
+                panfrost_drm_free_imported_bo(screen, bo);
         }
 
-        if (bo->has_checksum) {
-                /* TODO */
-                DBG("--leaking checksum (%zd bytes)--\n", bo->checksum_slab.size);
-        }
+        ralloc_free(bo);
+}
 
-        if (bo->imported) {
-                screen->driver->free_imported_bo(screen, bo);
+void
+panfrost_bo_reference(struct panfrost_bo *bo)
+{
+        pipe_reference(NULL, &bo->reference);
+}
+
+void
+panfrost_bo_unreference(struct pipe_screen *screen, struct panfrost_bo *bo)
+{
+        /* When the reference count goes to zero, we need to cleanup */
+
+        if (pipe_reference(&bo->reference, NULL)) {
+                panfrost_destroy_bo(pan_screen(screen), bo);
         }
 }
 
@@ -345,33 +441,10 @@ panfrost_resource_destroy(struct pipe_screen *screen,
                renderonly_scanout_destroy(rsrc->scanout, pscreen->ro);
 
        if (rsrc->bo)
-               panfrost_destroy_bo(pscreen, rsrc->bo);
+                panfrost_bo_unreference(screen, rsrc->bo);
 
-       FREE(rsrc);
-}
-
-static uint8_t *
-panfrost_map_bo(struct panfrost_context *ctx, struct pipe_transfer *transfer)
-{
-       struct panfrost_bo *bo = (struct panfrost_bo *)pan_resource(transfer->resource)->bo;
-
-        /* If non-zero level, it's a mipmapped resource and needs to be treated as such */
-        bo->is_mipmap |= transfer->level;
-
-        if (transfer->usage & PIPE_TRANSFER_MAP_DIRECTLY && bo->layout != PAN_LINEAR) {
-                /* We can only directly map linear resources */
-                return NULL;
-        }
-
-        if (transfer->resource->bind & PIPE_BIND_DEPTH_STENCIL) {
-                /* Mipmapped readpixels?! */
-                assert(transfer->level == 0);
-
-                /* Set the CPU mapping to that of the depth/stencil buffer in memory, untiled */
-                bo->cpu[transfer->level] = ctx->depth_stencil_buffer.cpu;
-        }
-
-        return bo->cpu[transfer->level];
+        util_range_destroy(&rsrc->valid_buffer_range);
+       ralloc_free(rsrc);
 }
 
 static void *
@@ -382,119 +455,164 @@ panfrost_transfer_map(struct pipe_context *pctx,
                       const struct pipe_box *box,
                       struct pipe_transfer **out_transfer)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
         int bytes_per_pixel = util_format_get_blocksize(resource->format);
-        int stride = bytes_per_pixel * resource->width0; /* TODO: Alignment? */
-       uint8_t *cpu;
+        struct panfrost_resource *rsrc = pan_resource(resource);
+        struct panfrost_bo *bo = rsrc->bo;
 
-        struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer);
-        transfer->level = level;
-        transfer->usage = usage;
-        transfer->box = *box;
-        transfer->stride = stride;
-        assert(!transfer->box.z);
+        struct panfrost_gtransfer *transfer = rzalloc(pctx, struct panfrost_gtransfer);
+        transfer->base.level = level;
+        transfer->base.usage = usage;
+        transfer->base.box = *box;
 
-        pipe_resource_reference(&transfer->resource, resource);
+        pipe_resource_reference(&transfer->base.resource, resource);
 
-        *out_transfer = transfer;
+        *out_transfer = &transfer->base;
 
-        if (resource->bind & PIPE_BIND_DISPLAY_TARGET ||
-            resource->bind & PIPE_BIND_SCANOUT ||
-            resource->bind & PIPE_BIND_SHARED) {
-                /* Mipmapped readpixels?! */
-                assert(level == 0);
+        /* Check if we're bound for rendering and this is a read pixels. If so,
+         * we need to flush */
 
-                /* Force a flush -- kill the pipeline */
-                panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
-        }
+        struct panfrost_context *ctx = pan_context(pctx);
+        struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
 
-       cpu = panfrost_map_bo(ctx, transfer);
-       if (cpu == NULL)
-               return NULL;
+        bool is_bound = false;
 
-        return cpu + transfer->box.x * bytes_per_pixel + transfer->box.y * stride;
-}
+        for (unsigned c = 0; c < fb->nr_cbufs; ++c) {
+                is_bound |= fb->cbufs[c]->texture == resource;
+        }
 
-static void
-panfrost_tile_texture(struct panfrost_screen *screen, struct panfrost_resource *rsrc, int level)
-{
-       struct panfrost_bo *bo = (struct panfrost_bo *)rsrc->bo;
-        int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format);
-        int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */
+        if (is_bound && (usage & PIPE_TRANSFER_READ)) {
+                assert(level == 0);
+                panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
+        }
 
-        int width = rsrc->base.width0 >> level;
-        int height = rsrc->base.height0 >> level;
+        /* TODO: Respect usage flags */
+
+        if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+                /* TODO: reallocate */
+                //printf("debug: Missed reallocate\n");
+        } else if ((usage & PIPE_TRANSFER_WRITE)
+                        && resource->target == PIPE_BUFFER
+                        && !util_ranges_intersect(&rsrc->valid_buffer_range, box->x, box->x + box->width)) {
+                /* No flush for writes to uninitialized */
+        } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+                if (usage & PIPE_TRANSFER_WRITE) {
+                        /* STUB: flush reading */
+                        //printf("debug: missed reading flush %d\n", resource->target);
+                } else if (usage & PIPE_TRANSFER_READ) {
+                        /* STUB: flush writing */
+                        //printf("debug: missed writing flush %d (%d-%d)\n", resource->target, box->x, box->x + box->width);
+                } else {
+                        /* Why are you even mapping?! */
+                }
+        }
 
-        /* Estimate swizzled bitmap size. Slight overestimates are fine.
-         * Underestimates will result in memory corruption or worse. */
+        if (bo->layout != PAN_LINEAR) {
+                /* Non-linear resources need to be indirectly mapped */
 
-        int swizzled_sz = panfrost_swizzled_size(width, height, bytes_per_pixel);
+                if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+                        return NULL;
 
-        /* Save the entry. But if there was already an entry here (from a
-         * previous upload of the resource), free that one so we don't leak */
+                transfer->base.stride = box->width * bytes_per_pixel;
+                transfer->base.layer_stride = transfer->base.stride * box->height;
+                transfer->map = rzalloc_size(transfer, transfer->base.layer_stride * box->depth);
+                assert(box->depth == 1);
 
-        if (bo->entry[level] != NULL) {
-                bo->entry[level]->freed = true;
-                pb_slab_free(&screen->slabs, &bo->entry[level]->base);
-        }
+                if ((usage & PIPE_TRANSFER_READ) && bo->slices[level].initialized) {
+                        if (bo->layout == PAN_AFBC) {
+                                DBG("Unimplemented: reads from AFBC");
+                        } else if (bo->layout == PAN_TILED) {
+                                panfrost_load_tiled_image(
+                                                transfer->map,
+                                                bo->cpu + bo->slices[level].offset,
+                                                box,
+                                                transfer->base.stride,
+                                                bo->slices[level].stride,
+                                                util_format_get_blocksize(resource->format));
+                        }
+                }
 
-        /* Allocate the transfer given that known size but do not copy */
-        struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, swizzled_sz, HEAP_TEXTURE);
-        struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry;
-        struct panfrost_memory *backing = (struct panfrost_memory *) entry->slab;
-        uint8_t *swizzled = backing->cpu + p_entry->offset;
+                return transfer->map;
+        } else {
+                transfer->base.stride = bo->slices[level].stride;
+                transfer->base.layer_stride = bo->cubemap_stride;
 
-        bo->entry[level] = p_entry;
-        bo->gpu[level] = backing->gpu + p_entry->offset;
+                /* By mapping direct-write, we're implicitly already
+                 * initialized (maybe), so be conservative */
 
-        /* Run actual texture swizzle, writing directly to the mapped
-         * GPU chunk we allocated */
+                if ((usage & PIPE_TRANSFER_WRITE) && (usage & PIPE_TRANSFER_MAP_DIRECTLY))
+                        bo->slices[level].initialized = true;
 
-        panfrost_texture_swizzle(width, height, bytes_per_pixel, stride, bo->cpu[level], swizzled);
+                return bo->cpu
+                        + bo->slices[level].offset
+                        + transfer->base.box.z * bo->cubemap_stride
+                        + transfer->base.box.y * bo->slices[level].stride
+                        + transfer->base.box.x * bytes_per_pixel;
+        }
 }
 
 static void
-panfrost_unmap_bo(struct panfrost_context *ctx,
-                         struct pipe_transfer *transfer)
+panfrost_transfer_unmap(struct pipe_context *pctx,
+                        struct pipe_transfer *transfer)
 {
-       struct panfrost_bo *bo = (struct panfrost_bo *)pan_resource(transfer->resource)->bo;
+        /* Gallium expects writeback here, so we tile */
+
+        struct panfrost_gtransfer *trans = pan_transfer(transfer);
+        struct panfrost_resource *prsrc = (struct panfrost_resource *) transfer->resource;
 
-        if (transfer->usage & PIPE_TRANSFER_WRITE) {
-                if (transfer->resource->target == PIPE_TEXTURE_2D) {
-                        struct panfrost_resource *prsrc = (struct panfrost_resource *) transfer->resource;
+        if (trans->map) {
+                struct panfrost_bo *bo = prsrc->bo;
+
+                if (transfer->usage & PIPE_TRANSFER_WRITE) {
+                        unsigned level = transfer->level;
+                        bo->slices[level].initialized = true;
 
-                        /* Gallium thinks writeback happens here; instead, this is our cue to tile */
                         if (bo->layout == PAN_AFBC) {
-                                DBG("Warning: writes to afbc surface can't possibly work out well for you...\n");
+                                DBG("Unimplemented: writes to AFBC\n");
                         } else if (bo->layout == PAN_TILED) {
-                                struct pipe_context *gallium = (struct pipe_context *) ctx;
-                                struct panfrost_screen *screen = pan_screen(gallium->screen);
-                                panfrost_tile_texture(screen, prsrc, transfer->level);
+                                assert(transfer->box.depth == 1);
+
+                                panfrost_store_tiled_image(
+                                                bo->cpu + bo->slices[level].offset,
+                                                trans->map,
+                                                &transfer->box,
+                                                bo->slices[level].stride,
+                                                transfer->stride,
+                                                util_format_get_blocksize(prsrc->base.format));
                         }
                 }
         }
-}
 
-static void
-panfrost_transfer_unmap(struct pipe_context *pctx,
-                        struct pipe_transfer *transfer)
-{
-        struct panfrost_context *ctx = pan_context(pctx);
 
-       panfrost_unmap_bo(ctx, transfer);
+       util_range_add(&prsrc->valid_buffer_range,
+                        transfer->box.x,
+                        transfer->box.x + transfer->box.width);
 
         /* Derefence the resource */
         pipe_resource_reference(&transfer->resource, NULL);
 
-        /* Transfer itself is CALLOCed at the moment */
-        free(transfer);
+        /* Transfer itself is RALLOCed at the moment */
+        ralloc_free(transfer);
+}
+
+static void
+panfrost_transfer_flush_region(struct pipe_context *pctx,
+               struct pipe_transfer *transfer,
+               const struct pipe_box *box)
+{
+       struct panfrost_resource *rsc = pan_resource(transfer->resource);
+
+       if (transfer->resource->target == PIPE_BUFFER) {
+               util_range_add(&rsc->valid_buffer_range,
+                                          transfer->box.x + box->x,
+                                          transfer->box.x + box->x + box->width);
+        }
 }
 
 static struct pb_slab *
 panfrost_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)
 {
         struct panfrost_screen *screen = (struct panfrost_screen *) priv;
-        struct panfrost_memory *mem = CALLOC_STRUCT(panfrost_memory);
+        struct panfrost_memory *mem = rzalloc(screen, struct panfrost_memory);
 
         size_t slab_size = (1 << (MAX_SLAB_ENTRY_SIZE + 1));
 
@@ -504,7 +622,7 @@ panfrost_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned gro
         LIST_INITHEAD(&mem->slab.free);
         for (unsigned i = 0; i < mem->slab.num_entries; ++i) {
                 /* Create a slab entry */
-                struct panfrost_memory_entry *entry = CALLOC_STRUCT(panfrost_memory_entry);
+                struct panfrost_memory_entry *entry = rzalloc(mem, struct panfrost_memory_entry);
                 entry->offset = entry_size * i;
 
                 entry->base.slab = &mem->slab;
@@ -516,7 +634,7 @@ panfrost_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned gro
         /* Actually allocate the memory from kernel-space. Mapped, same_va, no
          * special flags */
 
-        screen->driver->allocate_slab(screen, mem, slab_size / 4096, true, 0, 0, 0);
+        panfrost_drm_allocate_slab(screen, mem, slab_size / 4096, true, 0, 0, 0);
 
         return &mem->slab;
 }
@@ -534,7 +652,8 @@ panfrost_slab_free(void *priv, struct pb_slab *slab)
         struct panfrost_memory *mem = (struct panfrost_memory *) slab;
         struct panfrost_screen *screen = (struct panfrost_screen *) priv;
 
-        screen->driver->free_slab(screen, mem);
+        panfrost_drm_free_slab(screen, mem);
+        ralloc_free(mem);
 }
 
 static void
@@ -549,6 +668,56 @@ panfrost_resource_get_internal_format(struct pipe_resource *prsrc)
         return prsrc->format;
 }
 
+static boolean
+panfrost_generate_mipmap(
+                struct pipe_context *pctx,
+                struct pipe_resource *prsrc,
+                enum pipe_format format,
+                unsigned base_level,
+                unsigned last_level,
+                unsigned first_layer,
+                unsigned last_layer)
+{
+        struct panfrost_context *ctx = pan_context(pctx);
+        struct panfrost_resource *rsrc = pan_resource(prsrc);
+
+        /* Generating a mipmap invalidates the written levels, so make that
+         * explicit so we don't try to wallpaper them back and end up with
+         * u_blitter recursion */
+
+        assert(rsrc->bo);
+        for (unsigned l = base_level + 1; l <= last_level; ++l)
+                rsrc->bo->slices[l].initialized = false;
+
+        /* Beyond that, we just delegate the hard stuff. We're careful to
+         * include flushes on both ends to make sure the data is really valid.
+         * We could be doing a lot better perf-wise, especially once we have
+         * reorder-type optimizations in place. But for now prioritize
+         * correctness. */
+
+        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
+        bool has_draws = job->last_job.gpu;
+
+        if (has_draws)
+                panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
+
+        /* We've flushed the original buffer if needed, now trigger a blit */
+
+        bool blit_res = util_gen_mipmap(
+                        pctx, prsrc, format, 
+                        base_level, last_level,
+                        first_layer, last_layer,
+                        PIPE_TEX_FILTER_LINEAR);
+
+        /* If the blit was successful, flush once more. If it wasn't, well, let
+         * the state tracker deal with it. */
+
+        if (blit_res)
+                panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
+
+        return blit_res;
+}
+
 static void
 panfrost_resource_set_stencil(struct pipe_resource *prsrc,
                               struct pipe_resource *stencil)
@@ -567,7 +736,7 @@ static const struct u_transfer_vtbl transfer_vtbl = {
         .resource_destroy         = panfrost_resource_destroy,
         .transfer_map             = panfrost_transfer_map,
         .transfer_unmap           = panfrost_transfer_unmap,
-        .transfer_flush_region    = u_default_transfer_flush_region,
+        .transfer_flush_region    = panfrost_transfer_flush_region,
         .get_internal_format      = panfrost_resource_get_internal_format,
         .set_stencil              = panfrost_resource_set_stencil,
         .get_stencil              = panfrost_resource_get_stencil,
@@ -599,6 +768,12 @@ panfrost_resource_screen_init(struct panfrost_screen *pscreen)
                         panfrost_slab_free);
 }
 
+void
+panfrost_resource_screen_deinit(struct panfrost_screen *pscreen)
+{
+        pb_slabs_deinit(&pscreen->slabs);
+}
+
 void
 panfrost_resource_context_init(struct pipe_context *pctx)
 {
@@ -610,7 +785,7 @@ panfrost_resource_context_init(struct pipe_context *pctx)
         pctx->surface_destroy = panfrost_surface_destroy;
         pctx->resource_copy_region = util_resource_copy_region;
         pctx->blit = panfrost_blit;
-        //pctx->generate_mipmap = panfrost_generate_mipmap;
+        pctx->generate_mipmap = panfrost_generate_mipmap;
         pctx->flush_resource = panfrost_flush_resource;
         pctx->invalidate_resource = panfrost_invalidate_resource;
         pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;