broadcom: Add V3D 3.3 gallium driver called "vc5", for BCM7268.
authorEric Anholt <eric@anholt.net>
Fri, 3 Feb 2017 00:24:13 +0000 (16:24 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 10 Oct 2017 18:42:04 +0000 (11:42 -0700)
V3D 3.3 is a continuation of the 3D implementation in VC4 (v2.1 and v2.6).
V3D 3.3 introduces an MMU (no more CMA allocations) and support for
GLES3.1.  This driver is not currently conformant, though that will be a
target as soon as possible.

V3D 3.x parts use a new texture tiling layout common across many Broadcom
graphics parts including and the HVS scanout engine.  It also massively
changes the QPU instructions, introducing a common physical register file
(no more A/B split) and half-float instructions, while removing the 4x8
unorm instructions in favor of half-float for talking to fixed function
interfaces.  Because so much has changed, vc5 is implemented in a separate
gallium driver, using only the XML code-generation support from vc4.

v2: Fix tile layout for 64bpp textures.  Fix texture swizzling for 32-bit
    returns.  Fix up a bit of MRT setup.  Sync the simulator to kernel
    behavior a bit more.  Improve uniform debugging code.  Rebase on
    QIR->VIR rename.  Move texture state mostly to the CSOs.  Improve
    cache flushing on the simulator.  Fix program deletion
    use-after-frees.

Acked-by: Dave Airlie <airlied@gmail.com> (uabi plan)
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> (uabi plan)
43 files changed:
configure.ac
src/gallium/Makefile.am
src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
src/gallium/auxiliary/target-helpers/drm_helper.h
src/gallium/auxiliary/target-helpers/drm_helper_public.h
src/gallium/drivers/vc4/Automake.inc
src/gallium/drivers/vc4/Makefile.am
src/gallium/drivers/vc5/.editorconfig [new file with mode: 0644]
src/gallium/drivers/vc5/Automake.inc [new file with mode: 0644]
src/gallium/drivers/vc5/Makefile.am [new file with mode: 0644]
src/gallium/drivers/vc5/Makefile.sources [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_blit.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_bufmgr.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_bufmgr.h [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_cl.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_cl.h [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_context.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_context.h [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_draw.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_drm.h [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_emit.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_fence.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_formats.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_job.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_program.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_query.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_rcl.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_resource.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_resource.h [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_screen.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_screen.h [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_simulator.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_state.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_tiling.c [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_tiling.h [new file with mode: 0644]
src/gallium/drivers/vc5/vc5_uniforms.c [new file with mode: 0644]
src/gallium/targets/dri/Makefile.am
src/gallium/targets/dri/target.c
src/gallium/winsys/vc5/drm/Android.mk [new file with mode: 0644]
src/gallium/winsys/vc5/drm/Makefile.am [new file with mode: 0644]
src/gallium/winsys/vc5/drm/Makefile.sources [new file with mode: 0644]
src/gallium/winsys/vc5/drm/vc5_drm_public.h [new file with mode: 0644]
src/gallium/winsys/vc5/drm/vc5_drm_winsys.c [new file with mode: 0644]

index cbfa4f26ecbf9b6df7ae5834f02731d854ca1104..477ea516a2b9dcc00e80105b603a59558079416d 100644 (file)
@@ -2878,6 +2878,7 @@ AC_CONFIG_FILES([Makefile
                  src/gallium/drivers/etnaviv/Makefile
                  src/gallium/drivers/imx/Makefile
                  src/gallium/drivers/vc4/Makefile
+                 src/gallium/drivers/vc5/Makefile
                  src/gallium/drivers/virgl/Makefile
                  src/gallium/state_trackers/clover/Makefile
                  src/gallium/state_trackers/dri/Makefile
@@ -2921,6 +2922,7 @@ AC_CONFIG_FILES([Makefile
                  src/gallium/winsys/sw/wrapper/Makefile
                  src/gallium/winsys/sw/xlib/Makefile
                  src/gallium/winsys/vc4/drm/Makefile
+                 src/gallium/winsys/vc5/drm/Makefile
                  src/gallium/winsys/virgl/drm/Makefile
                  src/gallium/winsys/virgl/vtest/Makefile
                  src/gbm/Makefile
index 0749caead5fa7a22eaac1a4bd224a12ad622b23c..ea20799eba6702c1e3f1416d4d06fc1b09d4a7a4 100644 (file)
@@ -94,6 +94,11 @@ if HAVE_GALLIUM_VC4
 SUBDIRS += drivers/vc4 winsys/vc4/drm
 endif
 
+## vc5
+if HAVE_GALLIUM_VC5
+SUBDIRS += drivers/vc5 winsys/vc5/drm
+endif
+
 ## virgl
 if HAVE_GALLIUM_VIRGL
 SUBDIRS += drivers/virgl winsys/virgl/drm winsys/virgl/vtest
index 69a02838cc3bff36bd5d895341cb5de6d36f20b2..810542101edee7272950f9f58f93a081e0fc95a5 100644 (file)
@@ -120,6 +120,11 @@ static const struct drm_driver_descriptor driver_descriptors[] = {
         .create_screen = pipe_vc4_create_screen,
         .configuration = pipe_default_configuration_query,
     },
+    {
+        .driver_name = "vc5",
+        .create_screen = pipe_vc5_create_screen,
+        .configuration = pipe_default_configuration_query,
+    },
     {
         .driver_name = "etnaviv",
         .create_screen = pipe_etna_create_screen,
index 95b4a27111cb96e01c0df71c5c8935d3e7f4d4d4..7aea83b8842c09e5b4a4c7e34e74242d55fb6f01 100644 (file)
@@ -310,6 +310,29 @@ pipe_vc4_create_screen(int fd, const struct pipe_screen_config *config)
 
 #endif
 
+#ifdef GALLIUM_VC5
+#include "vc5/drm/vc5_drm_public.h"
+
+struct pipe_screen *
+pipe_vc5_create_screen(int fd, const struct pipe_screen_config *config)
+{
+   struct pipe_screen *screen;
+
+   screen = vc5_drm_screen_create(fd);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_vc5_create_screen(int fd, const struct pipe_screen_config *config)
+{
+   fprintf(stderr, "vc5: driver missing\n");
+   return NULL;
+}
+
+#endif
+
 #ifdef GALLIUM_ETNAVIV
 #include "etnaviv/drm/etnaviv_drm_public.h"
 
index 46819131bda75606b5cdbc2d98068d6f2253bc06..e21ea32fabe0252ee05bdc7529de6c3c7480740c 100644 (file)
@@ -39,6 +39,9 @@ pipe_virgl_create_screen(int fd, const struct pipe_screen_config *config);
 struct pipe_screen *
 pipe_vc4_create_screen(int fd, const struct pipe_screen_config *config);
 
+struct pipe_screen *
+pipe_vc5_create_screen(int fd, const struct pipe_screen_config *config);
+
 struct pipe_screen *
 pipe_pl111_create_screen(int fd, const struct pipe_screen_config *config);
 
index 5664c2ab14ea1f172524f6a7bb074468fcb9dccc..b1aa9726bd6bc34b5974506f80a42cf67749543a 100644 (file)
@@ -4,6 +4,7 @@ TARGET_DRIVERS += vc4
 TARGET_CPPFLAGS += -DGALLIUM_VC4
 TARGET_LIB_DEPS += \
        $(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \
-       $(top_builddir)/src/gallium/drivers/vc4/libvc4.la
+       $(top_builddir)/src/gallium/drivers/vc4/libvc4.la \
+       $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la
 
 endif
index 6db5fef0379cdd3493f533db764f3329ef3ddca3..c3e49af975d623a3d6c8fd70d4831e095969f3bf 100644 (file)
@@ -43,7 +43,6 @@ libvc4_la_SOURCES = $(C_SOURCES)
 
 libvc4_la_LIBADD = \
        $(SIM_LIB) \
-       $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la \
        $()
 
 if HAVE_ARM_ASM
diff --git a/src/gallium/drivers/vc5/.editorconfig b/src/gallium/drivers/vc5/.editorconfig
new file mode 100644 (file)
index 0000000..f3d8c47
--- /dev/null
@@ -0,0 +1,3 @@
+[*.{c,h}]
+indent_style = space
+indent_size = 8
diff --git a/src/gallium/drivers/vc5/Automake.inc b/src/gallium/drivers/vc5/Automake.inc
new file mode 100644 (file)
index 0000000..57c8a28
--- /dev/null
@@ -0,0 +1,14 @@
+if HAVE_GALLIUM_VC5
+
+TARGET_DRIVERS += vc5
+TARGET_CPPFLAGS += -DGALLIUM_VC5
+TARGET_LIB_DEPS += \
+       $(top_builddir)/src/gallium/winsys/vc5/drm/libvc5drm.la \
+       $(top_builddir)/src/gallium/drivers/vc5/libvc5.la \
+       $(top_builddir)/src/broadcom/libbroadcom.la
+
+if !HAVE_GALLIUM_VC4
+TARGET_LIB_DEPS += $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la
+endif
+
+endif
diff --git a/src/gallium/drivers/vc5/Makefile.am b/src/gallium/drivers/vc5/Makefile.am
new file mode 100644 (file)
index 0000000..42d4be7
--- /dev/null
@@ -0,0 +1,40 @@
+# Copyright © 2014 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+       -I$(top_builddir)/src/compiler/nir \
+       -I$(top_builddir)/src/broadcom \
+       $(LIBDRM_CFLAGS) \
+       $(VC5_SIMULATOR_CFLAGS) \
+       $(GALLIUM_DRIVER_CFLAGS) \
+       $(VALGRIND_CFLAGS) \
+       $()
+
+noinst_LTLIBRARIES = libvc5.la
+
+libvc5_la_SOURCES = $(C_SOURCES)
+
+libvc5_la_LDFLAGS = \
+       $(VC5_SIMULATOR_LIBS) \
+       $(NULL)
diff --git a/src/gallium/drivers/vc5/Makefile.sources b/src/gallium/drivers/vc5/Makefile.sources
new file mode 100644 (file)
index 0000000..0d54f83
--- /dev/null
@@ -0,0 +1,26 @@
+C_SOURCES := \
+       vc5_blit.c \
+       vc5_bufmgr.c \
+       vc5_bufmgr.h \
+       vc5_cl.c \
+       vc5_cl.h \
+       vc5_context.c \
+       vc5_context.h \
+       vc5_draw.c \
+       vc5_emit.c \
+       vc5_fence.c \
+       vc5_formats.c \
+       vc5_job.c \
+       vc5_program.c \
+       vc5_query.c \
+       vc5_rcl.c \
+       vc5_resource.c \
+       vc5_resource.h \
+       vc5_screen.c \
+       vc5_screen.h \
+       vc5_simulator.c \
+       vc5_state.c \
+       vc5_tiling.c \
+       vc5_tiling.h \
+       vc5_uniforms.c \
+       $()
diff --git a/src/gallium/drivers/vc5/vc5_blit.c b/src/gallium/drivers/vc5/vc5_blit.c
new file mode 100644 (file)
index 0000000..6481141
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+ * Copyright © 2015-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_surface.h"
+#include "util/u_blitter.h"
+#include "vc5_context.h"
+
+#if 0
+static struct pipe_surface *
+vc5_get_blit_surface(struct pipe_context *pctx,
+                     struct pipe_resource *prsc, unsigned level)
+{
+        struct pipe_surface tmpl;
+
+        memset(&tmpl, 0, sizeof(tmpl));
+        tmpl.format = prsc->format;
+        tmpl.u.tex.level = level;
+        tmpl.u.tex.first_layer = 0;
+        tmpl.u.tex.last_layer = 0;
+
+        return pctx->create_surface(pctx, prsc, &tmpl);
+}
+
+static bool
+is_tile_unaligned(unsigned size, unsigned tile_size)
+{
+        return size & (tile_size - 1);
+}
+
+static bool
+vc5_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        bool msaa = (info->src.resource->nr_samples > 1 ||
+                     info->dst.resource->nr_samples > 1);
+        int tile_width = msaa ? 32 : 64;
+        int tile_height = msaa ? 32 : 64;
+
+        if (util_format_is_depth_or_stencil(info->dst.resource->format))
+                return false;
+
+        if (info->scissor_enable)
+                return false;
+
+        if ((info->mask & PIPE_MASK_RGBA) == 0)
+                return false;
+
+        if (info->dst.box.x != info->src.box.x ||
+            info->dst.box.y != info->src.box.y ||
+            info->dst.box.width != info->src.box.width ||
+            info->dst.box.height != info->src.box.height) {
+                return false;
+        }
+
+        int dst_surface_width = u_minify(info->dst.resource->width0,
+                                         info->dst.level);
+        int dst_surface_height = u_minify(info->dst.resource->height0,
+                                         info->dst.level);
+        if (is_tile_unaligned(info->dst.box.x, tile_width) ||
+            is_tile_unaligned(info->dst.box.y, tile_height) ||
+            (is_tile_unaligned(info->dst.box.width, tile_width) &&
+             info->dst.box.x + info->dst.box.width != dst_surface_width) ||
+            (is_tile_unaligned(info->dst.box.height, tile_height) &&
+             info->dst.box.y + info->dst.box.height != dst_surface_height)) {
+                return false;
+        }
+
+        /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the
+         * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our
+         * destination surface) to determine the stride.  This may be wrong
+         * when reading from texture miplevels > 0, which are stored in
+         * POT-sized areas.  For MSAA, the tile addresses are computed
+         * explicitly by the RCL, but still use the destination width to
+         * determine the stride (which could be fixed by explicitly supplying
+         * it in the ABI).
+         */
+        struct vc5_resource *rsc = vc5_resource(info->src.resource);
+
+        uint32_t stride;
+
+        if (info->src.resource->nr_samples > 1)
+                stride = align(dst_surface_width, 32) * 4 * rsc->cpp;
+        /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T)
+           stride = align(dst_surface_width * rsc->cpp, 128); */
+        else
+                stride = align(dst_surface_width * rsc->cpp, 16);
+
+        if (stride != rsc->slices[info->src.level].stride)
+                return false;
+
+        if (info->dst.resource->format != info->src.resource->format)
+                return false;
+
+        if (false) {
+                fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
+                        info->src.box.x,
+                        info->src.box.y,
+                        info->dst.box.x,
+                        info->dst.box.y,
+                        info->dst.box.width,
+                        info->dst.box.height);
+        }
+
+        struct pipe_surface *dst_surf =
+                vc5_get_blit_surface(pctx, info->dst.resource, info->dst.level);
+        struct pipe_surface *src_surf =
+                vc5_get_blit_surface(pctx, info->src.resource, info->src.level);
+
+        vc5_flush_jobs_reading_resource(vc5, info->src.resource);
+
+        struct vc5_job *job = vc5_get_job(vc5, dst_surf, NULL);
+        pipe_surface_reference(&job->color_read, src_surf);
+
+        /* If we're resolving from MSAA to single sample, we still need to run
+         * the engine in MSAA mode for the load.
+         */
+        if (!job->msaa && info->src.resource->nr_samples > 1) {
+                job->msaa = true;
+                job->tile_width = 32;
+                job->tile_height = 32;
+        }
+
+        job->draw_min_x = info->dst.box.x;
+        job->draw_min_y = info->dst.box.y;
+        job->draw_max_x = info->dst.box.x + info->dst.box.width;
+        job->draw_max_y = info->dst.box.y + info->dst.box.height;
+        job->draw_width = dst_surf->width;
+        job->draw_height = dst_surf->height;
+
+        job->tile_width = tile_width;
+        job->tile_height = tile_height;
+        job->msaa = msaa;
+        job->needs_flush = true;
+        job->resolve |= PIPE_CLEAR_COLOR;
+
+        vc5_job_submit(vc5, job);
+
+        pipe_surface_reference(&dst_surf, NULL);
+        pipe_surface_reference(&src_surf, NULL);
+
+        return true;
+}
+#endif
+
+void
+vc5_blitter_save(struct vc5_context *vc5)
+{
+        util_blitter_save_fragment_constant_buffer_slot(vc5->blitter,
+                                                        vc5->constbuf[PIPE_SHADER_FRAGMENT].cb);
+        util_blitter_save_vertex_buffer_slot(vc5->blitter, vc5->vertexbuf.vb);
+        util_blitter_save_vertex_elements(vc5->blitter, vc5->vtx);
+        util_blitter_save_vertex_shader(vc5->blitter, vc5->prog.bind_vs);
+        util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets,
+                                     vc5->streamout.targets);
+        util_blitter_save_rasterizer(vc5->blitter, vc5->rasterizer);
+        util_blitter_save_viewport(vc5->blitter, &vc5->viewport);
+        util_blitter_save_scissor(vc5->blitter, &vc5->scissor);
+        util_blitter_save_fragment_shader(vc5->blitter, vc5->prog.bind_fs);
+        util_blitter_save_blend(vc5->blitter, vc5->blend);
+        util_blitter_save_depth_stencil_alpha(vc5->blitter, vc5->zsa);
+        util_blitter_save_stencil_ref(vc5->blitter, &vc5->stencil_ref);
+        util_blitter_save_sample_mask(vc5->blitter, vc5->sample_mask);
+        util_blitter_save_framebuffer(vc5->blitter, &vc5->framebuffer);
+        util_blitter_save_fragment_sampler_states(vc5->blitter,
+                        vc5->fragtex.num_samplers,
+                        (void **)vc5->fragtex.samplers);
+        util_blitter_save_fragment_sampler_views(vc5->blitter,
+                        vc5->fragtex.num_textures, vc5->fragtex.textures);
+        util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets,
+                                     vc5->streamout.targets);
+}
+
+static bool
+vc5_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
+{
+        struct vc5_context *vc5 = vc5_context(ctx);
+
+        if (!util_blitter_is_blit_supported(vc5->blitter, info)) {
+                fprintf(stderr, "blit unsupported %s -> %s\n",
+                    util_format_short_name(info->src.resource->format),
+                    util_format_short_name(info->dst.resource->format));
+                return false;
+        }
+
+        vc5_blitter_save(vc5);
+        util_blitter_blit(vc5->blitter, info);
+
+        return true;
+}
+
+/* Optimal hardware path for blitting pixels.
+ * Scaling, format conversion, up- and downsampling (resolve) are allowed.
+ */
+void
+vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
+{
+        struct pipe_blit_info info = *blit_info;
+
+#if 0
+        if (vc5_tile_blit(pctx, blit_info))
+                return;
+#endif
+
+        vc5_render_blit(pctx, &info);
+}
diff --git a/src/gallium/drivers/vc5/vc5_bufmgr.c b/src/gallium/drivers/vc5/vc5_bufmgr.c
new file mode 100644 (file)
index 0000000..c6c06dc
--- /dev/null
@@ -0,0 +1,580 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <err.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <xf86drm.h>
+#include <xf86drmMode.h>
+
+#include "util/u_hash_table.h"
+#include "util/u_memory.h"
+#include "util/ralloc.h"
+
+#include "vc5_context.h"
+#include "vc5_screen.h"
+
+#ifdef HAVE_VALGRIND
+#include <valgrind.h>
+#include <memcheck.h>
+#define VG(x) x
+#else
+#define VG(x)
+#endif
+
+static bool dump_stats = false;
+
+static void
+vc5_bo_cache_free_all(struct vc5_bo_cache *cache);
+
+static void
+vc5_bo_dump_stats(struct vc5_screen *screen)
+{
+        struct vc5_bo_cache *cache = &screen->bo_cache;
+
+        fprintf(stderr, "  BOs allocated:   %d\n", screen->bo_count);
+        fprintf(stderr, "  BOs size:        %dkb\n", screen->bo_size / 1024);
+        fprintf(stderr, "  BOs cached:      %d\n", cache->bo_count);
+        fprintf(stderr, "  BOs cached size: %dkb\n", cache->bo_size / 1024);
+
+        if (!list_empty(&cache->time_list)) {
+                struct vc5_bo *first = LIST_ENTRY(struct vc5_bo,
+                                                  cache->time_list.next,
+                                                  time_list);
+                struct vc5_bo *last = LIST_ENTRY(struct vc5_bo,
+                                                  cache->time_list.prev,
+                                                  time_list);
+
+                fprintf(stderr, "  oldest cache time: %ld\n",
+                        (long)first->free_time);
+                fprintf(stderr, "  newest cache time: %ld\n",
+                        (long)last->free_time);
+
+                struct timespec time;
+                clock_gettime(CLOCK_MONOTONIC, &time);
+                fprintf(stderr, "  now:               %ld\n",
+                        time.tv_sec);
+        }
+}
+
+static void
+vc5_bo_remove_from_cache(struct vc5_bo_cache *cache, struct vc5_bo *bo)
+{
+        list_del(&bo->time_list);
+        list_del(&bo->size_list);
+        cache->bo_count--;
+        cache->bo_size -= bo->size;
+}
+
+static struct vc5_bo *
+vc5_bo_from_cache(struct vc5_screen *screen, uint32_t size, const char *name)
+{
+        struct vc5_bo_cache *cache = &screen->bo_cache;
+        uint32_t page_index = size / 4096 - 1;
+
+        if (cache->size_list_size <= page_index)
+                return NULL;
+
+        struct vc5_bo *bo = NULL;
+        mtx_lock(&cache->lock);
+        if (!list_empty(&cache->size_list[page_index])) {
+                bo = LIST_ENTRY(struct vc5_bo, cache->size_list[page_index].next,
+                                size_list);
+
+                /* Check that the BO has gone idle.  If not, then we want to
+                 * allocate something new instead, since we assume that the
+                 * user will proceed to CPU map it and fill it with stuff.
+                 */
+                if (!vc5_bo_wait(bo, 0, NULL)) {
+                        mtx_unlock(&cache->lock);
+                        return NULL;
+                }
+
+                pipe_reference_init(&bo->reference, 1);
+                vc5_bo_remove_from_cache(cache, bo);
+
+                bo->name = name;
+        }
+        mtx_unlock(&cache->lock);
+        return bo;
+}
+
+struct vc5_bo *
+vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, const char *name)
+{
+        struct vc5_bo *bo;
+        int ret;
+
+        size = align(size, 4096);
+
+        bo = vc5_bo_from_cache(screen, size, name);
+        if (bo) {
+                if (dump_stats) {
+                        fprintf(stderr, "Allocated %s %dkb from cache:\n",
+                                name, size / 1024);
+                        vc5_bo_dump_stats(screen);
+                }
+                return bo;
+        }
+
+        bo = CALLOC_STRUCT(vc5_bo);
+        if (!bo)
+                return NULL;
+
+        pipe_reference_init(&bo->reference, 1);
+        bo->screen = screen;
+        bo->size = size;
+        bo->name = name;
+        bo->private = true;
+
+ retry:
+        ;
+
+        bool cleared_and_retried = false;
+        struct drm_vc5_create_bo create = {
+                .size = size
+        };
+
+        ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_CREATE_BO, &create);
+        bo->handle = create.handle;
+        bo->offset = create.offset;
+
+        if (ret != 0) {
+                if (!list_empty(&screen->bo_cache.time_list) &&
+                    !cleared_and_retried) {
+                        cleared_and_retried = true;
+                        vc5_bo_cache_free_all(&screen->bo_cache);
+                        goto retry;
+                }
+
+                free(bo);
+                return NULL;
+        }
+
+        screen->bo_count++;
+        screen->bo_size += bo->size;
+        if (dump_stats) {
+                fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
+                vc5_bo_dump_stats(screen);
+        }
+
+        return bo;
+}
+
+void
+vc5_bo_last_unreference(struct vc5_bo *bo)
+{
+        struct vc5_screen *screen = bo->screen;
+
+        struct timespec time;
+        clock_gettime(CLOCK_MONOTONIC, &time);
+        mtx_lock(&screen->bo_cache.lock);
+        vc5_bo_last_unreference_locked_timed(bo, time.tv_sec);
+        mtx_unlock(&screen->bo_cache.lock);
+}
+
+static void
+vc5_bo_free(struct vc5_bo *bo)
+{
+        struct vc5_screen *screen = bo->screen;
+
+        if (bo->map) {
+                if (using_vc5_simulator && bo->name &&
+                    strcmp(bo->name, "winsys") == 0) {
+                        free(bo->map);
+                } else {
+                        munmap(bo->map, bo->size);
+                        VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
+                }
+        }
+
+        struct drm_gem_close c;
+        memset(&c, 0, sizeof(c));
+        c.handle = bo->handle;
+        int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c);
+        if (ret != 0)
+                fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
+
+        screen->bo_count--;
+        screen->bo_size -= bo->size;
+
+        if (dump_stats) {
+                fprintf(stderr, "Freed %s%s%dkb:\n",
+                        bo->name ? bo->name : "",
+                        bo->name ? " " : "",
+                        bo->size / 1024);
+                vc5_bo_dump_stats(screen);
+        }
+
+        free(bo);
+}
+
+static void
+free_stale_bos(struct vc5_screen *screen, time_t time)
+{
+        struct vc5_bo_cache *cache = &screen->bo_cache;
+        bool freed_any = false;
+
+        list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list,
+                                 time_list) {
+                if (dump_stats && !freed_any) {
+                        fprintf(stderr, "Freeing stale BOs:\n");
+                        vc5_bo_dump_stats(screen);
+                        freed_any = true;
+                }
+
+                /* If it's more than a second old, free it. */
+                if (time - bo->free_time > 2) {
+                        vc5_bo_remove_from_cache(cache, bo);
+                        vc5_bo_free(bo);
+                } else {
+                        break;
+                }
+        }
+
+        if (dump_stats && freed_any) {
+                fprintf(stderr, "Freed stale BOs:\n");
+                vc5_bo_dump_stats(screen);
+        }
+}
+
+static void
+vc5_bo_cache_free_all(struct vc5_bo_cache *cache)
+{
+        mtx_lock(&cache->lock);
+        list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list,
+                                 time_list) {
+                vc5_bo_remove_from_cache(cache, bo);
+                vc5_bo_free(bo);
+        }
+        mtx_unlock(&cache->lock);
+}
+
+void
+vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time)
+{
+        struct vc5_screen *screen = bo->screen;
+        struct vc5_bo_cache *cache = &screen->bo_cache;
+        uint32_t page_index = bo->size / 4096 - 1;
+
+        if (!bo->private) {
+                vc5_bo_free(bo);
+                return;
+        }
+
+        if (cache->size_list_size <= page_index) {
+                struct list_head *new_list =
+                        ralloc_array(screen, struct list_head, page_index + 1);
+
+                /* Move old list contents over (since the array has moved, and
+                 * therefore the pointers to the list heads have to change).
+                 */
+                for (int i = 0; i < cache->size_list_size; i++) {
+                        struct list_head *old_head = &cache->size_list[i];
+                        if (list_empty(old_head))
+                                list_inithead(&new_list[i]);
+                        else {
+                                new_list[i].next = old_head->next;
+                                new_list[i].prev = old_head->prev;
+                                new_list[i].next->prev = &new_list[i];
+                                new_list[i].prev->next = &new_list[i];
+                        }
+                }
+                for (int i = cache->size_list_size; i < page_index + 1; i++)
+                        list_inithead(&new_list[i]);
+
+                cache->size_list = new_list;
+                cache->size_list_size = page_index + 1;
+        }
+
+        bo->free_time = time;
+        list_addtail(&bo->size_list, &cache->size_list[page_index]);
+        list_addtail(&bo->time_list, &cache->time_list);
+        cache->bo_count++;
+        cache->bo_size += bo->size;
+        if (dump_stats) {
+                fprintf(stderr, "Freed %s %dkb to cache:\n",
+                        bo->name, bo->size / 1024);
+                vc5_bo_dump_stats(screen);
+        }
+        bo->name = NULL;
+
+        free_stale_bos(screen, time);
+}
+
+static struct vc5_bo *
+vc5_bo_open_handle(struct vc5_screen *screen,
+                   uint32_t winsys_stride,
+                   uint32_t handle, uint32_t size)
+{
+        struct vc5_bo *bo;
+
+        assert(size);
+
+        mtx_lock(&screen->bo_handles_mutex);
+
+        bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle);
+        if (bo) {
+                pipe_reference(NULL, &bo->reference);
+                goto done;
+        }
+
+        bo = CALLOC_STRUCT(vc5_bo);
+        pipe_reference_init(&bo->reference, 1);
+        bo->screen = screen;
+        bo->handle = handle;
+        bo->size = size;
+        bo->name = "winsys";
+        bo->private = false;
+
+#ifdef USE_VC5_SIMULATOR
+        vc5_simulator_open_from_handle(screen->fd, winsys_stride,
+                                       bo->handle, bo->size);
+        bo->map = malloc(bo->size);
+#endif
+
+        util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo);
+
+done:
+        mtx_unlock(&screen->bo_handles_mutex);
+        return bo;
+}
+
+struct vc5_bo *
+vc5_bo_open_name(struct vc5_screen *screen, uint32_t name,
+                 uint32_t winsys_stride)
+{
+        struct drm_gem_open o = {
+                .name = name
+        };
+        int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o);
+        if (ret) {
+                fprintf(stderr, "Failed to open bo %d: %s\n",
+                        name, strerror(errno));
+                return NULL;
+        }
+
+        return vc5_bo_open_handle(screen, winsys_stride, o.handle, o.size);
+}
+
+struct vc5_bo *
+vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, uint32_t winsys_stride)
+{
+        uint32_t handle;
+        int ret = drmPrimeFDToHandle(screen->fd, fd, &handle);
+        int size;
+        if (ret) {
+                fprintf(stderr, "Failed to get vc5 handle for dmabuf %d\n", fd);
+                return NULL;
+        }
+
+        /* Determine the size of the bo we were handed. */
+        size = lseek(fd, 0, SEEK_END);
+        if (size == -1) {
+                fprintf(stderr, "Couldn't get size of dmabuf fd %d.\n", fd);
+                return NULL;
+        }
+
+        return vc5_bo_open_handle(screen, winsys_stride, handle, size);
+}
+
+int
+vc5_bo_get_dmabuf(struct vc5_bo *bo)
+{
+        int fd;
+        int ret = drmPrimeHandleToFD(bo->screen->fd, bo->handle,
+                                     O_CLOEXEC, &fd);
+        if (ret != 0) {
+                fprintf(stderr, "Failed to export gem bo %d to dmabuf\n",
+                        bo->handle);
+                return -1;
+        }
+
+        mtx_lock(&bo->screen->bo_handles_mutex);
+        bo->private = false;
+        util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo);
+        mtx_unlock(&bo->screen->bo_handles_mutex);
+
+        return fd;
+}
+
+bool
+vc5_bo_flink(struct vc5_bo *bo, uint32_t *name)
+{
+        struct drm_gem_flink flink = {
+                .handle = bo->handle,
+        };
+        int ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink);
+        if (ret) {
+                fprintf(stderr, "Failed to flink bo %d: %s\n",
+                        bo->handle, strerror(errno));
+                free(bo);
+                return false;
+        }
+
+        bo->private = false;
+        *name = flink.name;
+
+        return true;
+}
+
+static int vc5_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns)
+{
+        struct drm_vc5_wait_seqno wait = {
+                .seqno = seqno,
+                .timeout_ns = timeout_ns,
+        };
+        int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_SEQNO, &wait);
+        if (ret == -1)
+                return -errno;
+        else
+                return 0;
+
+}
+
+bool
+vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+               const char *reason)
+{
+        if (screen->finished_seqno >= seqno)
+                return true;
+
+        if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) {
+                if (vc5_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) {
+                        fprintf(stderr, "Blocking on seqno %lld for %s\n",
+                                (long long)seqno, reason);
+                }
+        }
+
+        int ret = vc5_wait_seqno_ioctl(screen->fd, seqno, timeout_ns);
+        if (ret) {
+                if (ret != -ETIME) {
+                        fprintf(stderr, "wait failed: %d\n", ret);
+                        abort();
+                }
+
+                return false;
+        }
+
+        screen->finished_seqno = seqno;
+        return true;
+}
+
+static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns)
+{
+        struct drm_vc5_wait_bo wait = {
+                .handle = handle,
+                .timeout_ns = timeout_ns,
+        };
+        int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_BO, &wait);
+        if (ret == -1)
+                return -errno;
+        else
+                return 0;
+
+}
+
+bool
+vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason)
+{
+        struct vc5_screen *screen = bo->screen;
+
+        if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) {
+                if (vc5_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) {
+                        fprintf(stderr, "Blocking on %s BO for %s\n",
+                                bo->name, reason);
+                }
+        }
+
+        int ret = vc5_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns);
+        if (ret) {
+                if (ret != -ETIME) {
+                        fprintf(stderr, "wait failed: %d\n", ret);
+                        abort();
+                }
+
+                return false;
+        }
+
+        return true;
+}
+
+void *
+vc5_bo_map_unsynchronized(struct vc5_bo *bo)
+{
+        uint64_t offset;
+        int ret;
+
+        if (bo->map)
+                return bo->map;
+
+        struct drm_vc5_mmap_bo map;
+        memset(&map, 0, sizeof(map));
+        map.handle = bo->handle;
+        ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_VC5_MMAP_BO, &map);
+        offset = map.offset;
+        if (ret != 0) {
+                fprintf(stderr, "map ioctl failure\n");
+                abort();
+        }
+
+        bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                       bo->screen->fd, offset);
+        if (bo->map == MAP_FAILED) {
+                fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+                        bo->handle, (long long)offset, bo->size);
+                abort();
+        }
+        VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
+
+        return bo->map;
+}
+
+void *
+vc5_bo_map(struct vc5_bo *bo)
+{
+        void *map = vc5_bo_map_unsynchronized(bo);
+
+        bool ok = vc5_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map");
+        if (!ok) {
+                fprintf(stderr, "BO wait for map failed\n");
+                abort();
+        }
+
+        return map;
+}
+
+void
+vc5_bufmgr_destroy(struct pipe_screen *pscreen)
+{
+        struct vc5_screen *screen = vc5_screen(pscreen);
+        struct vc5_bo_cache *cache = &screen->bo_cache;
+
+        vc5_bo_cache_free_all(cache);
+
+        if (dump_stats) {
+                fprintf(stderr, "BO stats after screen destroy:\n");
+                vc5_bo_dump_stats(screen);
+        }
+}
diff --git a/src/gallium/drivers/vc5/vc5_bufmgr.h b/src/gallium/drivers/vc5/vc5_bufmgr.h
new file mode 100644 (file)
index 0000000..cca2b22
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_BUFMGR_H
+#define VC5_BUFMGR_H
+
+#include <stdint.h>
+#include "util/u_hash_table.h"
+#include "util/u_inlines.h"
+#include "util/list.h"
+#include "vc5_screen.h"
+
+struct vc5_context;
+
+struct vc5_bo {
+        struct pipe_reference reference;
+        struct vc5_screen *screen;
+        void *map;
+        const char *name;
+        uint32_t handle;
+        uint32_t size;
+
+        /* Address of the BO in our page tables. */
+        uint32_t offset;
+
+        /** Entry in the linked list of buffers freed, by age. */
+        struct list_head time_list;
+        /** Entry in the per-page-count linked list of buffers freed (by age). */
+        struct list_head size_list;
+        /** Approximate second when the bo was freed. */
+        time_t free_time;
+        /**
+         * Whether only our process has a reference to the BO (meaning that
+         * it's safe to reuse it in the BO cache).
+         */
+        bool private;
+};
+
+struct vc5_bo *vc5_bo_alloc(struct vc5_screen *screen, uint32_t size,
+                            const char *name);
+void vc5_bo_last_unreference(struct vc5_bo *bo);
+void vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time);
+struct vc5_bo *vc5_bo_open_name(struct vc5_screen *screen, uint32_t name,
+                                uint32_t winsys_stride);
+struct vc5_bo *vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd,
+                                  uint32_t winsys_stride);
+bool vc5_bo_flink(struct vc5_bo *bo, uint32_t *name);
+int vc5_bo_get_dmabuf(struct vc5_bo *bo);
+
+static inline void
+vc5_bo_set_reference(struct vc5_bo **old_bo, struct vc5_bo *new_bo)
+{
+        if (pipe_reference(&(*old_bo)->reference, &new_bo->reference))
+                vc5_bo_last_unreference(*old_bo);
+        *old_bo = new_bo;
+}
+
+static inline struct vc5_bo *
+vc5_bo_reference(struct vc5_bo *bo)
+{
+        pipe_reference(NULL, &bo->reference);
+        return bo;
+}
+
+static inline void
+vc5_bo_unreference(struct vc5_bo **bo)
+{
+        struct vc5_screen *screen;
+        if (!*bo)
+                return;
+
+        if ((*bo)->private) {
+                /* Avoid the mutex for private BOs */
+                if (pipe_reference(&(*bo)->reference, NULL))
+                        vc5_bo_last_unreference(*bo);
+        } else {
+                screen = (*bo)->screen;
+                mtx_lock(&screen->bo_handles_mutex);
+
+                if (pipe_reference(&(*bo)->reference, NULL)) {
+                        util_hash_table_remove(screen->bo_handles,
+                                               (void *)(uintptr_t)(*bo)->handle);
+                        vc5_bo_last_unreference(*bo);
+                }
+
+                mtx_unlock(&screen->bo_handles_mutex);
+        }
+
+        *bo = NULL;
+}
+
+static inline void
+vc5_bo_unreference_locked_timed(struct vc5_bo **bo, time_t time)
+{
+        if (!*bo)
+                return;
+
+        if (pipe_reference(&(*bo)->reference, NULL))
+                vc5_bo_last_unreference_locked_timed(*bo, time);
+        *bo = NULL;
+}
+
+void *
+vc5_bo_map(struct vc5_bo *bo);
+
+void *
+vc5_bo_map_unsynchronized(struct vc5_bo *bo);
+
+bool
+vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason);
+
+bool
+vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+               const char *reason);
+
+void
+vc5_bufmgr_destroy(struct pipe_screen *pscreen);
+
+#endif /* VC5_BUFMGR_H */
+
diff --git a/src/gallium/drivers/vc5/vc5_cl.c b/src/gallium/drivers/vc5/vc5_cl.c
new file mode 100644 (file)
index 0000000..37d96c4
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_math.h"
+#include "util/ralloc.h"
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+void
+vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl)
+{
+        cl->base = NULL;
+        cl->next = cl->base;
+        cl->size = 0;
+        cl->job = job;
+}
+
+uint32_t
+vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t space, uint32_t alignment)
+{
+        uint32_t offset = align(cl_offset(cl), alignment);
+
+        if (offset + space <= cl->size) {
+                cl->next = cl->base + offset;
+                return offset;
+        }
+
+        vc5_bo_unreference(&cl->bo);
+        cl->bo = vc5_bo_alloc(cl->job->vc5->screen, align(space, 4096), "CL");
+        cl->base = vc5_bo_map(cl->bo);
+        cl->size = cl->bo->size;
+        cl->next = cl->base;
+
+        return 0;
+}
+
+void
+vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t space)
+{
+        if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size)
+                return;
+
+        struct vc5_bo *new_bo = vc5_bo_alloc(cl->job->vc5->screen, 4096, "CL");
+        assert(space <= new_bo->size);
+
+        /* Chain to the new BO from the old one. */
+        if (cl->bo) {
+                cl_emit(cl, BRANCH, branch) {
+                        branch.address = cl_address(new_bo, 0);
+                }
+                vc5_bo_unreference(&cl->bo);
+        } else {
+                /* Root the first RCL/BCL BO in the job. */
+                vc5_job_add_bo(cl->job, cl->bo);
+        }
+
+        cl->bo = new_bo;
+        cl->base = vc5_bo_map(cl->bo);
+        cl->size = cl->bo->size;
+        cl->next = cl->base;
+}
+
+void
+vc5_destroy_cl(struct vc5_cl *cl)
+{
+        vc5_bo_unreference(&cl->bo);
+}
diff --git a/src/gallium/drivers/vc5/vc5_cl.h b/src/gallium/drivers/vc5/vc5_cl.h
new file mode 100644 (file)
index 0000000..e935eef
--- /dev/null
@@ -0,0 +1,246 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_CL_H
+#define VC5_CL_H
+
+#include <stdint.h>
+
+#include "util/u_math.h"
+#include "util/macros.h"
+
+struct vc5_bo;
+struct vc5_job;
+struct vc5_cl;
+
+/**
+ * Undefined structure, used for typechecking that you're passing the pointers
+ * to these functions correctly.
+ */
+struct vc5_cl_out;
+
+/** A reference to a BO used in the CL packing functions */
+struct vc5_cl_reloc {
+        struct vc5_bo *bo;
+        uint32_t offset;
+};
+
+static inline void cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *);
+
+#define __gen_user_data struct vc5_cl
+#define __gen_address_type struct vc5_cl_reloc
+#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
+                                     (reloc)->offset)
+#define __gen_emit_reloc cl_pack_emit_reloc
+
+struct vc5_cl {
+        void *base;
+        struct vc5_job *job;
+        struct vc5_cl_out *next;
+        struct vc5_bo *bo;
+        uint32_t size;
+};
+
+void vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl);
+void vc5_destroy_cl(struct vc5_cl *cl);
+void vc5_dump_cl(void *cl, uint32_t size, bool is_render);
+uint32_t vc5_gem_hindex(struct vc5_job *job, struct vc5_bo *bo);
+
+struct PACKED unaligned_16 { uint16_t x; };
+struct PACKED unaligned_32 { uint32_t x; };
+
+static inline uint32_t cl_offset(struct vc5_cl *cl)
+{
+        return (char *)cl->next - (char *)cl->base;
+}
+
+static inline void
+cl_advance(struct vc5_cl_out **cl, uint32_t n)
+{
+        (*cl) = (struct vc5_cl_out *)((char *)(*cl) + n);
+}
+
+static inline struct vc5_cl_out *
+cl_start(struct vc5_cl *cl)
+{
+        return cl->next;
+}
+
+static inline void
+cl_end(struct vc5_cl *cl, struct vc5_cl_out *next)
+{
+        cl->next = next;
+        assert(cl_offset(cl) <= cl->size);
+}
+
+
+static inline void
+put_unaligned_32(struct vc5_cl_out *ptr, uint32_t val)
+{
+        struct unaligned_32 *p = (void *)ptr;
+        p->x = val;
+}
+
+static inline void
+put_unaligned_16(struct vc5_cl_out *ptr, uint16_t val)
+{
+        struct unaligned_16 *p = (void *)ptr;
+        p->x = val;
+}
+
+static inline void
+cl_u8(struct vc5_cl_out **cl, uint8_t n)
+{
+        *(uint8_t *)(*cl) = n;
+        cl_advance(cl, 1);
+}
+
+static inline void
+cl_u16(struct vc5_cl_out **cl, uint16_t n)
+{
+        put_unaligned_16(*cl, n);
+        cl_advance(cl, 2);
+}
+
+static inline void
+cl_u32(struct vc5_cl_out **cl, uint32_t n)
+{
+        put_unaligned_32(*cl, n);
+        cl_advance(cl, 4);
+}
+
+static inline void
+cl_aligned_u32(struct vc5_cl_out **cl, uint32_t n)
+{
+        *(uint32_t *)(*cl) = n;
+        cl_advance(cl, 4);
+}
+
+static inline void
+cl_aligned_reloc(struct vc5_cl *cl,
+                 struct vc5_cl_out **cl_out,
+                 struct vc5_bo *bo, uint32_t offset)
+{
+        cl_aligned_u32(cl_out, bo->offset + offset);
+        vc5_job_add_bo(cl->job, bo);
+}
+
+static inline void
+cl_ptr(struct vc5_cl_out **cl, void *ptr)
+{
+        *(struct vc5_cl_out **)(*cl) = ptr;
+        cl_advance(cl, sizeof(void *));
+}
+
+static inline void
+cl_f(struct vc5_cl_out **cl, float f)
+{
+        cl_u32(cl, fui(f));
+}
+
+static inline void
+cl_aligned_f(struct vc5_cl_out **cl, float f)
+{
+        cl_aligned_u32(cl, fui(f));
+}
+
+/**
+ * Reference to a BO with its associated offset, used in the pack process.
+ */
+static inline struct vc5_cl_reloc
+cl_address(struct vc5_bo *bo, uint32_t offset)
+{
+        struct vc5_cl_reloc reloc = {
+                .bo = bo,
+                .offset = offset,
+        };
+        return reloc;
+}
+
+uint32_t vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t size, uint32_t align);
+void vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t size);
+
+#define cl_packet_header(packet) V3D33_ ## packet ## _header
+#define cl_packet_length(packet) V3D33_ ## packet ## _length
+#define cl_packet_pack(packet)   V3D33_ ## packet ## _pack
+#define cl_packet_struct(packet) V3D33_ ## packet
+
+static inline void *
+cl_get_emit_space(struct vc5_cl_out **cl, size_t size)
+{
+        void *addr = *cl;
+        cl_advance(cl, size);
+        return addr;
+}
+
+/* Macro for setting up an emit of a CL struct.  A temporary unpacked struct
+ * is created, which you get to set fields in of the form:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
+ *     .flags.flat_shade_flags = 1 << 2,
+ * }
+ *
+ * or default values only can be emitted with just:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
+ *
+ * The trick here is that we make a for loop that will execute the body
+ * (either the block or the ';' after the macro invocation) exactly once.
+ */
+#define cl_emit(cl, packet, name)                                \
+        for (struct cl_packet_struct(packet) name = {            \
+                cl_packet_header(packet)                         \
+        },                                                       \
+        *_loop_terminate = &name;                                \
+        __builtin_expect(_loop_terminate != NULL, 1);            \
+        ({                                                       \
+                struct vc5_cl_out *cl_out = cl_start(cl);        \
+                cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
+                VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out,         \
+                                                 cl_packet_length(packet))); \
+                cl_advance(&cl_out, cl_packet_length(packet));   \
+                cl_end(cl, cl_out);                              \
+                _loop_terminate = NULL;                          \
+        }))                                                      \
+
+#define cl_emit_prepacked(cl, packet) do {                       \
+        memcpy((cl)->next, packet, sizeof(*packet));             \
+        cl_advance(&(cl)->next, sizeof(*packet));                \
+} while (0)
+
+/**
+ * Helper function called by the XML-generated pack functions for filling in
+ * an address field in shader records.
+ *
+ * Since we have a private address space as of VC5, our BOs can have lifelong
+ * offsets, and all the kernel needs to know is which BOs need to be paged in
+ * for this exec.
+ */
+static inline void
+cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *reloc)
+{
+        if (reloc->bo)
+                vc5_job_add_bo(cl->job, reloc->bo);
+}
+
+#endif /* VC5_CL_H */
diff --git a/src/gallium/drivers/vc5/vc5_context.c b/src/gallium/drivers/vc5/vc5_context.c
new file mode 100644 (file)
index 0000000..f80020a
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <xf86drm.h>
+#include <err.h>
+
+#include "pipe/p_defines.h"
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_blitter.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_primconvert.h"
+#include "pipe/p_screen.h"
+
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_resource.h"
+
+void
+vc5_flush(struct pipe_context *pctx)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+
+        struct hash_entry *entry;
+        hash_table_foreach(vc5->jobs, entry) {
+                struct vc5_job *job = entry->data;
+                vc5_job_submit(vc5, job);
+        }
+}
+
+static void
+vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
+               unsigned flags)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+
+        vc5_flush(pctx);
+
+        if (fence) {
+                struct pipe_screen *screen = pctx->screen;
+                struct vc5_fence *f = vc5_fence_create(vc5->screen,
+                                                       vc5->last_emit_seqno);
+                screen->fence_reference(screen, fence, NULL);
+                *fence = (struct pipe_fence_handle *)f;
+        }
+}
+
+static void
+vc5_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_resource *rsc = vc5_resource(prsc);
+
+        rsc->initialized_buffers = 0;
+
+        struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs,
+                                                           prsc);
+        if (!entry)
+                return;
+
+        struct vc5_job *job = entry->data;
+        if (job->key.zsbuf && job->key.zsbuf->texture == prsc)
+                job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
+}
+
+static void
+vc5_context_destroy(struct pipe_context *pctx)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+
+        vc5_flush(pctx);
+
+        if (vc5->blitter)
+                util_blitter_destroy(vc5->blitter);
+
+        if (vc5->primconvert)
+                util_primconvert_destroy(vc5->primconvert);
+
+        if (vc5->uploader)
+                u_upload_destroy(vc5->uploader);
+
+        slab_destroy_child(&vc5->transfer_pool);
+
+        pipe_surface_reference(&vc5->framebuffer.cbufs[0], NULL);
+        pipe_surface_reference(&vc5->framebuffer.zsbuf, NULL);
+
+        vc5_program_fini(pctx);
+
+        ralloc_free(vc5);
+}
+
+struct pipe_context *
+vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
+{
+        struct vc5_screen *screen = vc5_screen(pscreen);
+        struct vc5_context *vc5;
+
+        /* Prevent dumping of the shaders built during context setup. */
+        uint32_t saved_shaderdb_flag = V3D_DEBUG & V3D_DEBUG_SHADERDB;
+        V3D_DEBUG &= ~V3D_DEBUG_SHADERDB;
+
+        vc5 = rzalloc(NULL, struct vc5_context);
+        if (!vc5)
+                return NULL;
+        struct pipe_context *pctx = &vc5->base;
+
+        vc5->screen = screen;
+
+        pctx->screen = pscreen;
+        pctx->priv = priv;
+        pctx->destroy = vc5_context_destroy;
+        pctx->flush = vc5_pipe_flush;
+        pctx->invalidate_resource = vc5_invalidate_resource;
+
+        vc5_draw_init(pctx);
+        vc5_state_init(pctx);
+        vc5_program_init(pctx);
+        vc5_query_init(pctx);
+        vc5_resource_context_init(pctx);
+
+        vc5_job_init(vc5);
+
+        vc5->fd = screen->fd;
+
+        slab_create_child(&vc5->transfer_pool, &screen->transfer_pool);
+
+        vc5->uploader = u_upload_create_default(&vc5->base);
+        vc5->base.stream_uploader = vc5->uploader;
+        vc5->base.const_uploader = vc5->uploader;
+
+        vc5->blitter = util_blitter_create(pctx);
+        if (!vc5->blitter)
+                goto fail;
+
+        vc5->primconvert = util_primconvert_create(pctx,
+                                                   (1 << PIPE_PRIM_QUADS) - 1);
+        if (!vc5->primconvert)
+                goto fail;
+
+        V3D_DEBUG |= saved_shaderdb_flag;
+
+        vc5->sample_mask = (1 << VC5_MAX_SAMPLES) - 1;
+
+        return &vc5->base;
+
+fail:
+        pctx->destroy(pctx);
+        return NULL;
+}
diff --git a/src/gallium/drivers/vc5/vc5_context.h b/src/gallium/drivers/vc5/vc5_context.h
new file mode 100644 (file)
index 0000000..b8f3f78
--- /dev/null
@@ -0,0 +1,466 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_CONTEXT_H
+#define VC5_CONTEXT_H
+
+#include <stdio.h>
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/bitset.h"
+#include "util/slab.h"
+#include "xf86drm.h"
+#include "vc5_drm.h"
+
+struct vc5_job;
+struct vc5_bo;
+void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo);
+
+#define __user
+#include "vc5_drm.h"
+#include "vc5_bufmgr.h"
+#include "vc5_resource.h"
+#include "vc5_cl.h"
+
+#ifdef USE_VC5_SIMULATOR
+#define using_vc5_simulator true
+#else
+#define using_vc5_simulator false
+#endif
+
+#define VC5_DIRTY_BLEND         (1 <<  0)
+#define VC5_DIRTY_RASTERIZER    (1 <<  1)
+#define VC5_DIRTY_ZSA           (1 <<  2)
+#define VC5_DIRTY_FRAGTEX       (1 <<  3)
+#define VC5_DIRTY_VERTTEX       (1 <<  4)
+
+#define VC5_DIRTY_BLEND_COLOR   (1 <<  7)
+#define VC5_DIRTY_STENCIL_REF   (1 <<  8)
+#define VC5_DIRTY_SAMPLE_MASK   (1 <<  9)
+#define VC5_DIRTY_FRAMEBUFFER   (1 << 10)
+#define VC5_DIRTY_STIPPLE       (1 << 11)
+#define VC5_DIRTY_VIEWPORT      (1 << 12)
+#define VC5_DIRTY_CONSTBUF      (1 << 13)
+#define VC5_DIRTY_VTXSTATE      (1 << 14)
+#define VC5_DIRTY_VTXBUF        (1 << 15)
+#define VC5_DIRTY_SCISSOR       (1 << 17)
+#define VC5_DIRTY_FLAT_SHADE_FLAGS (1 << 18)
+#define VC5_DIRTY_PRIM_MODE     (1 << 19)
+#define VC5_DIRTY_CLIP          (1 << 20)
+#define VC5_DIRTY_UNCOMPILED_VS (1 << 21)
+#define VC5_DIRTY_UNCOMPILED_FS (1 << 22)
+#define VC5_DIRTY_COMPILED_CS   (1 << 23)
+#define VC5_DIRTY_COMPILED_VS   (1 << 24)
+#define VC5_DIRTY_COMPILED_FS   (1 << 25)
+#define VC5_DIRTY_FS_INPUTS     (1 << 26)
+#define VC5_DIRTY_STREAMOUT     (1 << 27)
+
+#define VC5_MAX_FS_INPUTS 64
+
+struct vc5_sampler_view {
+        struct pipe_sampler_view base;
+        uint32_t p0;
+        uint32_t p1;
+        /* Precomputed swizzles to pass in to the shader key. */
+        uint8_t swizzle[4];
+
+        uint8_t texture_shader_state[32];
+};
+
+struct vc5_sampler_state {
+        struct pipe_sampler_state base;
+        uint32_t p0;
+        uint32_t p1;
+
+        uint8_t texture_shader_state[32];
+};
+
+struct vc5_texture_stateobj {
+        struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
+        unsigned num_textures;
+        struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+        unsigned num_samplers;
+        struct vc5_cl_reloc texture_state[PIPE_MAX_SAMPLERS];
+};
+
+struct vc5_shader_uniform_info {
+        enum quniform_contents *contents;
+        uint32_t *data;
+        uint32_t count;
+};
+
+struct vc5_uncompiled_shader {
+        /** A name for this program, so you can track it in shader-db output. */
+        uint32_t program_id;
+        /** How many variants of this program were compiled, for shader-db. */
+        uint32_t compiled_variant_count;
+        struct pipe_shader_state base;
+        uint32_t num_tf_outputs;
+        struct v3d_varying_slot *tf_outputs;
+        uint16_t tf_specs[PIPE_MAX_SO_BUFFERS];
+        uint32_t num_tf_specs;
+};
+
+struct vc5_compiled_shader {
+        struct vc5_bo *bo;
+
+        union {
+                struct v3d_prog_data *base;
+                struct v3d_vs_prog_data *vs;
+                struct v3d_fs_prog_data *fs;
+        } prog_data;
+
+        /**
+         * VC5_DIRTY_* flags that, when set in vc5->dirty, mean that the
+         * uniforms have to be rewritten (and therefore the shader state
+         * reemitted).
+         */
+        uint32_t uniform_dirty_bits;
+};
+
+struct vc5_program_stateobj {
+        struct vc5_uncompiled_shader *bind_vs, *bind_fs;
+        struct vc5_compiled_shader *cs, *vs, *fs;
+};
+
+struct vc5_constbuf_stateobj {
+        struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+        uint32_t enabled_mask;
+        uint32_t dirty_mask;
+};
+
+struct vc5_vertexbuf_stateobj {
+        struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+        unsigned count;
+        uint32_t enabled_mask;
+        uint32_t dirty_mask;
+};
+
+struct vc5_vertex_stateobj {
+        struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
+        unsigned num_elements;
+};
+
+struct vc5_streamout_stateobj {
+        struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+        unsigned num_targets;
+};
+
+/* Hash table key for vc5->jobs */
+struct vc5_job_key {
+        struct pipe_surface *cbufs[4];
+        struct pipe_surface *zsbuf;
+};
+
+/**
+ * A complete bin/render job.
+ *
+ * This is all of the state necessary to submit a bin/render to the kernel.
+ * We want to be able to have multiple in progress at a time, so that we don't
+ * need to flush an existing CL just to switch to rendering to a new render
+ * target (which would mean reading back from the old render target when
+ * starting to render to it again).
+ */
+struct vc5_job {
+        struct vc5_context *vc5;
+        struct vc5_cl bcl;
+        struct vc5_cl rcl;
+        struct vc5_cl indirect;
+        struct vc5_bo *tile_alloc;
+        uint32_t shader_rec_count;
+
+        struct drm_vc5_submit_cl submit;
+
+        /**
+         * Set of all BOs referenced by the job.  This will be used for making
+         * the list of BOs that the kernel will need to have paged in to
+         * execute our job.
+         */
+        struct set *bos;
+        /* Size of the submit.bo_handles array. */
+        uint32_t bo_handles_size;
+
+        /** @{ Surfaces to submit rendering for. */
+        struct pipe_surface *cbufs[4];
+        struct pipe_surface *zsbuf;
+        /** @} */
+        /** @{
+         * Bounding box of the scissor across all queued drawing.
+         *
+         * Note that the max values are exclusive.
+         */
+        uint32_t draw_min_x;
+        uint32_t draw_min_y;
+        uint32_t draw_max_x;
+        uint32_t draw_max_y;
+        /** @} */
+        /** @{
+         * Width/height of the color framebuffer being rendered to,
+         * for VC5_TILE_RENDERING_MODE_CONFIG.
+        */
+        uint32_t draw_width;
+        uint32_t draw_height;
+        /** @} */
+        /** @{ Tile information, depending on MSAA and float color buffer. */
+        uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */
+        uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */
+
+        uint32_t tile_width; /** @< Width of a tile. */
+        uint32_t tile_height; /** @< Height of a tile. */
+        /** maximum internal_bpp of all color render targets. */
+        uint32_t internal_bpp;
+
+        /** Whether the current rendering is in a 4X MSAA tile buffer. */
+        bool msaa;
+        /** @} */
+
+        /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the
+         * first rendering.
+         */
+        uint32_t cleared;
+        /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to
+         * (either clears or draws).
+         */
+        uint32_t resolve;
+        uint32_t clear_color[2];
+        uint32_t clear_zs; /**< 24-bit unorm depth/stencil */
+
+        /**
+         * Set if some drawing (triangles, blits, or just a glClear()) has
+         * been done to the FBO, meaning that we need to
+         * DRM_IOCTL_VC5_SUBMIT_CL.
+         */
+        bool needs_flush;
+
+        bool uses_early_z;
+
+        /**
+         * Number of draw calls (not counting full buffer clears) queued in
+         * the current job.
+         */
+        uint32_t draw_calls_queued;
+
+        struct vc5_job_key key;
+};
+
+struct vc5_context {
+        struct pipe_context base;
+
+        int fd;
+        struct vc5_screen *screen;
+
+        /** The 3D rendering job for the currently bound FBO. */
+        struct vc5_job *job;
+
+        /* Map from struct vc5_job_key to the job for that FBO.
+         */
+        struct hash_table *jobs;
+
+        /**
+         * Map from vc5_resource to a job writing to that resource.
+         *
+         * Primarily for flushing jobs rendering to textures that are now
+         * being read from.
+         */
+        struct hash_table *write_jobs;
+
+        struct slab_child_pool transfer_pool;
+        struct blitter_context *blitter;
+
+        /** bitfield of VC5_DIRTY_* */
+        uint32_t dirty;
+
+        struct primconvert_context *primconvert;
+
+        struct hash_table *fs_cache, *vs_cache;
+        uint32_t next_uncompiled_program_id;
+        uint64_t next_compiled_program_id;
+
+        struct vc5_compiler_state *compiler_state;
+
+        uint8_t prim_mode;
+
+        /** Maximum index buffer valid for the current shader_rec. */
+        uint32_t max_index;
+        /** Last index bias baked into the current shader_rec. */
+        uint32_t last_index_bias;
+
+        /** Seqno of the last CL flush's job. */
+        uint64_t last_emit_seqno;
+
+        struct u_upload_mgr *uploader;
+
+        /** @{ Current pipeline state objects */
+        struct pipe_scissor_state scissor;
+        struct pipe_blend_state *blend;
+        struct vc5_rasterizer_state *rasterizer;
+        struct vc5_depth_stencil_alpha_state *zsa;
+
+        struct vc5_texture_stateobj verttex, fragtex;
+
+        struct vc5_program_stateobj prog;
+
+        struct vc5_vertex_stateobj *vtx;
+
+        struct {
+                struct pipe_blend_color f;
+                uint16_t hf[4];
+        } blend_color;
+        struct pipe_stencil_ref stencil_ref;
+        unsigned sample_mask;
+        struct pipe_framebuffer_state framebuffer;
+        struct pipe_poly_stipple stipple;
+        struct pipe_clip_state clip;
+        struct pipe_viewport_state viewport;
+        struct vc5_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
+        struct vc5_vertexbuf_stateobj vertexbuf;
+        struct vc5_streamout_stateobj streamout;
+        /** @} */
+};
+
+struct vc5_rasterizer_state {
+        struct pipe_rasterizer_state base;
+
+        /* VC5_CONFIGURATION_BITS */
+        uint8_t config_bits[3];
+
+        float point_size;
+
+        /**
+         * Half-float (1/8/7 bits) value of polygon offset units for
+         * VC5_PACKET_DEPTH_OFFSET
+         */
+        uint16_t offset_units;
+        /**
+         * Half-float (1/8/7 bits) value of polygon offset scale for
+         * VC5_PACKET_DEPTH_OFFSET
+         */
+        uint16_t offset_factor;
+};
+
+struct vc5_depth_stencil_alpha_state {
+        struct pipe_depth_stencil_alpha_state base;
+
+        bool early_z_enable;
+
+        /** Uniforms for stencil state.
+         *
+         * Index 0 is either the front config, or the front-and-back config.
+         * Index 1 is the back config if doing separate back stencil.
+         * Index 2 is the writemask config if it's not a common mask value.
+         */
+        uint32_t stencil_uniforms[3];
+};
+
+#define perf_debug(...) do {                            \
+        if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))       \
+                fprintf(stderr, __VA_ARGS__);           \
+} while (0)
+
+static inline struct vc5_context *
+vc5_context(struct pipe_context *pcontext)
+{
+        return (struct vc5_context *)pcontext;
+}
+
+static inline struct vc5_sampler_view *
+vc5_sampler_view(struct pipe_sampler_view *psview)
+{
+        return (struct vc5_sampler_view *)psview;
+}
+
+static inline struct vc5_sampler_state *
+vc5_sampler_state(struct pipe_sampler_state *psampler)
+{
+        return (struct vc5_sampler_state *)psampler;
+}
+
+struct pipe_context *vc5_context_create(struct pipe_screen *pscreen,
+                                        void *priv, unsigned flags);
+void vc5_draw_init(struct pipe_context *pctx);
+void vc5_state_init(struct pipe_context *pctx);
+void vc5_program_init(struct pipe_context *pctx);
+void vc5_program_fini(struct pipe_context *pctx);
+void vc5_query_init(struct pipe_context *pctx);
+
+void vc5_simulator_init(struct vc5_screen *screen);
+void vc5_simulator_init(struct vc5_screen *screen);
+void vc5_simulator_destroy(struct vc5_screen *screen);
+void vc5_simulator_destroy(struct vc5_screen *screen);
+int vc5_simulator_flush(struct vc5_context *vc5,
+                        struct drm_vc5_submit_cl *args,
+                        struct vc5_job *job);
+int vc5_simulator_ioctl(int fd, unsigned long request, void *arg);
+void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride,
+                                    int handle, uint32_t size);
+
+static inline int
+vc5_ioctl(int fd, unsigned long request, void *arg)
+{
+        if (using_vc5_simulator)
+                return vc5_simulator_ioctl(fd, request, arg);
+        else
+                return drmIoctl(fd, request, arg);
+}
+
+void vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader);
+struct vc5_cl_reloc vc5_write_uniforms(struct vc5_context *vc5,
+                                       struct vc5_compiled_shader *shader,
+                                       struct vc5_constbuf_stateobj *cb,
+                                       struct vc5_texture_stateobj *texstate);
+
+void vc5_flush(struct pipe_context *pctx);
+void vc5_job_init(struct vc5_context *vc5);
+struct vc5_job *vc5_get_job(struct vc5_context *vc5,
+                            struct pipe_surface **cbufs,
+                            struct pipe_surface *zsbuf);
+struct vc5_job *vc5_get_job_for_fbo(struct vc5_context *vc5);
+void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo);
+void vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job);
+void vc5_flush_jobs_writing_resource(struct vc5_context *vc5,
+                                     struct pipe_resource *prsc);
+void vc5_flush_jobs_reading_resource(struct vc5_context *vc5,
+                                     struct pipe_resource *prsc);
+void vc5_emit_state(struct pipe_context *pctx);
+void vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode);
+
+bool vc5_rt_format_supported(enum pipe_format f);
+bool vc5_tex_format_supported(enum pipe_format f);
+uint8_t vc5_get_rt_format(enum pipe_format f);
+uint8_t vc5_get_tex_format(enum pipe_format f);
+uint8_t vc5_get_tex_return_size(enum pipe_format f);
+uint8_t vc5_get_tex_return_channels(enum pipe_format f);
+const uint8_t *vc5_get_format_swizzle(enum pipe_format f);
+void vc5_get_internal_type_bpp_for_output_format(uint32_t format,
+                                                 uint32_t *type,
+                                                 uint32_t *bpp);
+
+void vc5_init_query_functions(struct vc5_context *vc5);
+void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info);
+void vc5_blitter_save(struct vc5_context *vc5);
+void vc5_emit_rcl(struct vc5_job *job);
+
+
+#endif /* VC5_CONTEXT_H */
diff --git a/src/gallium/drivers/vc5/vc5_draw.c b/src/gallium/drivers/vc5/vc5_draw.c
new file mode 100644 (file)
index 0000000..d78fa32
--- /dev/null
@@ -0,0 +1,607 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_blitter.h"
+#include "util/u_prim.h"
+#include "util/u_format.h"
+#include "util/u_pack_color.h"
+#include "util/u_prim_restart.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_primconvert.h"
+
+#include "vc5_context.h"
+#include "vc5_resource.h"
+#include "vc5_cl.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+/**
+ * Does the initial bining command list setup for drawing to a given FBO.
+ */
+static void
+vc5_start_draw(struct vc5_context *vc5)
+{
+        struct vc5_job *job = vc5->job;
+
+        if (job->needs_flush)
+                return;
+
+        /* Get space to emit our BCL state, using a branch to jump to a new BO
+         * if necessary.
+         */
+        vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
+
+        job->submit.bcl_start = job->bcl.bo->offset;
+        vc5_job_add_bo(job, job->bcl.bo);
+
+        job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc");
+        struct vc5_bo *tsda = vc5_bo_alloc(vc5->screen,
+                                           job->draw_tiles_y *
+                                           job->draw_tiles_x *
+                                           64,
+                                           "TSDA");
+
+        /* "Binning mode lists start with a Tile Binning Mode Configuration
+         * item (120)"
+         *
+         * Part1 signals the end of binning config setup.
+         */
+        cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART2, config) {
+                config.tile_allocation_memory_address =
+                        cl_address(job->tile_alloc, 0);
+                config.tile_allocation_memory_size = job->tile_alloc->size;
+        }
+
+        cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) {
+                config.tile_state_data_array_base_address =
+                        cl_address(tsda, 0);
+
+                config.width_in_tiles = job->draw_tiles_x;
+                config.height_in_tiles = job->draw_tiles_y;
+
+                /* Must be >= 1 */
+                config.number_of_render_targets = 1;
+
+                config.multisample_mode_4x = job->msaa;
+
+                config.maximum_bpp_of_all_render_targets = job->internal_bpp;
+        }
+
+        vc5_bo_unreference(&tsda);
+
+        /* There's definitely nothing in the VCD cache we want. */
+        cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
+
+        /* "Binning mode lists must have a Start Tile Binning item (6) after
+         *  any prefix state data before the binning list proper starts."
+         */
+        cl_emit(&job->bcl, START_TILE_BINNING, bin);
+
+        cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, fmt) {
+                fmt.data_type = LIST_INDEXED;
+                fmt.primitive_type = LIST_TRIANGLES;
+        }
+
+        job->needs_flush = true;
+        job->draw_width = vc5->framebuffer.width;
+        job->draw_height = vc5->framebuffer.height;
+}
+
+static void
+vc5_predraw_check_textures(struct pipe_context *pctx,
+                           struct vc5_texture_stateobj *stage_tex)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+
+        for (int i = 0; i < stage_tex->num_textures; i++) {
+                struct pipe_sampler_view *view = stage_tex->textures[i];
+                if (!view)
+                        continue;
+
+                vc5_flush_jobs_writing_resource(vc5, view->texture);
+        }
+}
+
+static struct vc5_cl_reloc
+vc5_get_default_values(struct vc5_context *vc5)
+{
+        struct vc5_job *job = vc5->job;
+
+        /* VC5_DIRTY_VTXSTATE */
+        struct vc5_vertex_stateobj *vtx = vc5->vtx;
+
+        /* Set up the default values for attributes. */
+        vc5_cl_ensure_space(&job->indirect, 4 * 4 * vtx->num_elements, 4);
+        struct vc5_cl_reloc default_values =
+                cl_address(job->indirect.bo, cl_offset(&job->indirect));
+        vc5_bo_reference(default_values.bo);
+
+        struct vc5_cl_out *defaults = cl_start(&job->indirect);
+        for (int i = 0; i < vtx->num_elements; i++) {
+                cl_aligned_f(&defaults, 0.0);
+                cl_aligned_f(&defaults, 0.0);
+                cl_aligned_f(&defaults, 0.0);
+                cl_aligned_f(&defaults, 1.0);
+        }
+        cl_end(&job->indirect, defaults);
+
+        return default_values;
+}
+
+static void
+vc5_emit_gl_shader_state(struct vc5_context *vc5,
+                         const struct pipe_draw_info *info,
+                         uint32_t extra_index_bias)
+{
+        struct vc5_job *job = vc5->job;
+        /* VC5_DIRTY_VTXSTATE */
+        struct vc5_vertex_stateobj *vtx = vc5->vtx;
+        /* VC5_DIRTY_VTXBUF */
+        struct vc5_vertexbuf_stateobj *vertexbuf = &vc5->vertexbuf;
+
+        /* Upload the uniforms to the indirect CL first */
+        struct vc5_cl_reloc fs_uniforms =
+                vc5_write_uniforms(vc5, vc5->prog.fs,
+                                   &vc5->constbuf[PIPE_SHADER_FRAGMENT],
+                                   &vc5->fragtex);
+        struct vc5_cl_reloc vs_uniforms =
+                vc5_write_uniforms(vc5, vc5->prog.vs,
+                                   &vc5->constbuf[PIPE_SHADER_VERTEX],
+                                   &vc5->verttex);
+        struct vc5_cl_reloc cs_uniforms =
+                vc5_write_uniforms(vc5, vc5->prog.cs,
+                                   &vc5->constbuf[PIPE_SHADER_VERTEX],
+                                   &vc5->verttex);
+        struct vc5_cl_reloc default_values = vc5_get_default_values(vc5);
+
+        uint32_t shader_rec_offset =
+                vc5_cl_ensure_space(&job->indirect,
+                                    cl_packet_length(GL_SHADER_STATE_RECORD) +
+                                    vtx->num_elements *
+                                    cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
+                                    32);
+
+        cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
+                shader.enable_clipping = true;
+                /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
+                shader.point_size_in_shaded_vertex_data =
+                        (info->mode == PIPE_PRIM_POINTS &&
+                         vc5->rasterizer->base.point_size_per_vertex);
+
+                shader.fragment_shader_does_z_writes =
+                        vc5->prog.fs->prog_data.fs->writes_z;
+
+                shader.number_of_varyings_in_fragment_shader =
+                        vc5->prog.fs->prog_data.base->num_inputs;
+
+                shader.propagate_nans = true;
+
+                shader.coordinate_shader_code_address =
+                        cl_address(vc5->prog.cs->bo, 0);
+                shader.vertex_shader_code_address =
+                        cl_address(vc5->prog.vs->bo, 0);
+                shader.fragment_shader_code_address =
+                        cl_address(vc5->prog.fs->bo, 0);
+
+                /* XXX: Use combined input/output size flag in the common
+                 * case.
+                 */
+                shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true;
+                shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true;
+                shader.coordinate_shader_input_vpm_segment_size =
+                        vc5->prog.cs->prog_data.vs->vpm_input_size;
+                shader.vertex_shader_input_vpm_segment_size =
+                        vc5->prog.vs->prog_data.vs->vpm_input_size;
+
+                shader.coordinate_shader_output_vpm_segment_size =
+                        vc5->prog.cs->prog_data.vs->vpm_output_size;
+                shader.vertex_shader_output_vpm_segment_size =
+                        vc5->prog.vs->prog_data.vs->vpm_output_size;
+
+                shader.coordinate_shader_uniforms_address = cs_uniforms;
+                shader.vertex_shader_uniforms_address = vs_uniforms;
+                shader.fragment_shader_uniforms_address = fs_uniforms;
+
+                shader.vertex_id_read_by_coordinate_shader =
+                        vc5->prog.cs->prog_data.vs->uses_vid;
+                shader.instance_id_read_by_coordinate_shader =
+                        vc5->prog.cs->prog_data.vs->uses_iid;
+                shader.vertex_id_read_by_vertex_shader =
+                        vc5->prog.vs->prog_data.vs->uses_vid;
+                shader.instance_id_read_by_vertex_shader =
+                        vc5->prog.vs->prog_data.vs->uses_iid;
+
+                shader.address_of_default_attribute_values = default_values;
+        }
+
+        for (int i = 0; i < vtx->num_elements; i++) {
+                struct pipe_vertex_element *elem = &vtx->pipe[i];
+                struct pipe_vertex_buffer *vb =
+                        &vertexbuf->vb[elem->vertex_buffer_index];
+                struct vc5_resource *rsc = vc5_resource(vb->buffer.resource);
+                const struct util_format_description *desc =
+                        util_format_description(elem->src_format);
+
+                uint32_t offset = (vb->buffer_offset +
+                                   elem->src_offset +
+                                   vb->stride * info->index_bias);
+
+                cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+                        uint32_t r_size = desc->channel[0].size;
+
+                        /* vec_size == 0 means 4 */
+                        attr.vec_size = desc->nr_channels & 3;
+
+                        switch (desc->channel[0].type) {
+                        case UTIL_FORMAT_TYPE_FLOAT:
+                                if (r_size == 32) {
+                                        attr.type = ATTRIBUTE_FLOAT;
+                                } else {
+                                        assert(r_size == 16);
+                                        attr.type = ATTRIBUTE_HALF_FLOAT;
+                                }
+                                break;
+
+                        case UTIL_FORMAT_TYPE_SIGNED:
+                        case UTIL_FORMAT_TYPE_UNSIGNED:
+                                switch (r_size) {
+                                case 32:
+                                        attr.type = ATTRIBUTE_INT;
+                                        break;
+                                case 16:
+                                        attr.type = ATTRIBUTE_SHORT;
+                                        break;
+                                case 10:
+                                        attr.type = ATTRIBUTE_INT2_10_10_10;
+                                        break;
+                                case 8:
+                                        attr.type = ATTRIBUTE_BYTE;
+                                        break;
+                                default:
+                                        fprintf(stderr,
+                                                "format %s unsupported\n",
+                                                desc->name);
+                                        attr.type = ATTRIBUTE_BYTE;
+                                        abort();
+                                }
+                                break;
+
+                        default:
+                                fprintf(stderr,
+                                        "format %s unsupported\n",
+                                        desc->name);
+                                abort();
+                        }
+
+                        attr.signed_int_type =
+                                desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED;
+
+                        attr.normalized_int_type = desc->channel[0].normalized;
+                        attr.read_as_int_uint = desc->channel[0].pure_integer;
+                        attr.address = cl_address(rsc->bo, offset);
+                        attr.stride = vb->stride;
+                        attr.instance_divisor = elem->instance_divisor;
+                        attr.number_of_values_read_by_coordinate_shader =
+                                vc5->prog.cs->prog_data.vs->vattr_sizes[i];
+                        attr.number_of_values_read_by_vertex_shader =
+                                vc5->prog.vs->prog_data.vs->vattr_sizes[i];
+                }
+        }
+
+        cl_emit(&job->bcl, GL_SHADER_STATE, state) {
+                state.address = cl_address(job->indirect.bo, shader_rec_offset);
+                state.number_of_attribute_arrays = vtx->num_elements;
+        }
+
+        vc5_bo_unreference(&cs_uniforms.bo);
+        vc5_bo_unreference(&vs_uniforms.bo);
+        vc5_bo_unreference(&fs_uniforms.bo);
+        vc5_bo_unreference(&default_values.bo);
+
+        job->shader_rec_count++;
+}
+
+static void
+vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+
+        if (!info->count_from_stream_output && !info->indirect &&
+            !info->primitive_restart &&
+            !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
+                return;
+
+        /* Fall back for weird desktop GL primitive restart values. */
+        if (info->primitive_restart &&
+            info->index_size) {
+                uint32_t mask = ~0;
+
+                switch (info->index_size) {
+                case 2:
+                        mask = 0xffff;
+                        break;
+                case 1:
+                        mask = 0xff;
+                        break;
+                }
+
+                if (info->restart_index != mask) {
+                        util_draw_vbo_without_prim_restart(pctx, info);
+                        return;
+                }
+        }
+
+        if (info->mode >= PIPE_PRIM_QUADS) {
+                util_primconvert_save_rasterizer_state(vc5->primconvert, &vc5->rasterizer->base);
+                util_primconvert_draw_vbo(vc5->primconvert, info);
+                perf_debug("Fallback conversion for %d %s vertices\n",
+                           info->count, u_prim_name(info->mode));
+                return;
+        }
+
+        /* Before setting up the draw, do any fixup blits necessary. */
+        vc5_predraw_check_textures(pctx, &vc5->verttex);
+        vc5_predraw_check_textures(pctx, &vc5->fragtex);
+
+        struct vc5_job *job = vc5_get_job_for_fbo(vc5);
+
+        /* Get space to emit our draw call into the BCL, using a branch to
+         * jump to a new BO if necessary.
+         */
+        vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
+
+        if (vc5->prim_mode != info->mode) {
+                vc5->prim_mode = info->mode;
+                vc5->dirty |= VC5_DIRTY_PRIM_MODE;
+        }
+
+        vc5_start_draw(vc5);
+        vc5_update_compiled_shaders(vc5, info->mode);
+
+        vc5_emit_state(pctx);
+
+        if ((vc5->dirty & (VC5_DIRTY_VTXBUF |
+                           VC5_DIRTY_VTXSTATE |
+                           VC5_DIRTY_PRIM_MODE |
+                           VC5_DIRTY_RASTERIZER |
+                           VC5_DIRTY_COMPILED_CS |
+                           VC5_DIRTY_COMPILED_VS |
+                           VC5_DIRTY_COMPILED_FS |
+                           vc5->prog.cs->uniform_dirty_bits |
+                           vc5->prog.vs->uniform_dirty_bits |
+                           vc5->prog.fs->uniform_dirty_bits)) ||
+            vc5->last_index_bias != info->index_bias) {
+                vc5_emit_gl_shader_state(vc5, info, 0);
+        }
+
+        vc5->dirty = 0;
+
+        /* Note that the primitive type fields match with OpenGL/gallium
+         * definitions, up to but not including QUADS.
+         */
+        if (info->index_size) {
+                uint32_t index_size = info->index_size;
+                uint32_t offset = info->start * index_size;
+                struct pipe_resource *prsc;
+                if (info->has_user_indices) {
+                        prsc = NULL;
+                        u_upload_data(vc5->uploader, 0,
+                                      info->count * info->index_size, 4,
+                                      info->index.user,
+                                      &offset, &prsc);
+                } else {
+                        prsc = info->index.resource;
+                }
+                struct vc5_resource *rsc = vc5_resource(prsc);
+
+                if (info->instance_count > 1) {
+                        cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) {
+                                prim.index_type = ffs(info->index_size) - 1;
+                                prim.maximum_index = (1u << 31) - 1; /* XXX */
+                                prim.address_of_indices_list =
+                                        cl_address(rsc->bo, offset);
+                                prim.mode = info->mode;
+                                prim.enable_primitive_restarts = info->primitive_restart;
+
+                                prim.number_of_instances = info->instance_count;
+                                prim.instance_length = info->count;
+                        }
+                } else {
+                        cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) {
+                                prim.index_type = ffs(info->index_size) - 1;
+                                prim.length = info->count;
+                                prim.maximum_index = (1u << 31) - 1; /* XXX */
+                                prim.address_of_indices_list =
+                                        cl_address(rsc->bo, offset);
+                                prim.mode = info->mode;
+                                prim.enable_primitive_restarts = info->primitive_restart;
+                        }
+                }
+
+                job->draw_calls_queued++;
+
+                if (info->has_user_indices)
+                        pipe_resource_reference(&prsc, NULL);
+        } else {
+                if (info->instance_count > 1) {
+                        cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMITIVES, prim) {
+                                prim.mode = info->mode;
+                                prim.index_of_first_vertex = info->start;
+                                prim.number_of_instances = info->instance_count;
+                                prim.instance_length = info->count;
+                        }
+                } else {
+                        cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, prim) {
+                                prim.mode = info->mode;
+                                prim.length = info->count;
+                                prim.index_of_first_vertex = info->start;
+                        }
+                }
+        }
+        job->draw_calls_queued++;
+
+        if (vc5->zsa && job->zsbuf &&
+            (vc5->zsa->base.depth.enabled ||
+             vc5->zsa->base.stencil[0].enabled)) {
+                struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture);
+                vc5_job_add_bo(job, rsc->bo);
+
+                if (vc5->zsa->base.depth.enabled) {
+                        job->resolve |= PIPE_CLEAR_DEPTH;
+                        rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
+
+                        if (vc5->zsa->early_z_enable)
+                                job->uses_early_z = true;
+                }
+
+                if (vc5->zsa->base.stencil[0].enabled) {
+                        job->resolve |= PIPE_CLEAR_STENCIL;
+                        rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
+                }
+        }
+
+        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+                uint32_t bit = PIPE_CLEAR_COLOR0 << i;
+
+                if (job->resolve & bit || !job->cbufs[i])
+                        continue;
+                struct vc5_resource *rsc = vc5_resource(job->cbufs[i]->texture);
+
+                job->resolve |= bit;
+                vc5_job_add_bo(job, rsc->bo);
+        }
+
+        if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)
+                vc5_flush(pctx);
+}
+
+static uint32_t
+pack_rgba(enum pipe_format format, const float *rgba)
+{
+        union util_color uc;
+        util_pack_color(rgba, format, &uc);
+        if (util_format_get_blocksize(format) == 2)
+                return uc.us;
+        else
+                return uc.ui[0];
+}
+
+static void
+vc5_clear(struct pipe_context *pctx, unsigned buffers,
+          const union pipe_color_union *color, double depth, unsigned stencil)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_job *job = vc5_get_job_for_fbo(vc5);
+
+        /* We can't flag new buffers for clearing once we've queued draws.  We
+         * could avoid this by using the 3d engine to clear.
+         */
+        if (job->draw_calls_queued) {
+                perf_debug("Flushing rendering to process new clear.\n");
+                vc5_job_submit(vc5, job);
+                job = vc5_get_job_for_fbo(vc5);
+        }
+
+        if (buffers & PIPE_CLEAR_COLOR0) {
+                struct vc5_resource *rsc =
+                        vc5_resource(vc5->framebuffer.cbufs[0]->texture);
+                uint32_t clear_color;
+
+#if 0
+                if (vc5_rt_format_is_565(vc5->framebuffer.cbufs[0]->format)) {
+                        /* In 565 mode, the hardware will be packing our color
+                         * for us.
+                         */
+                        clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM,
+                                                color->f);
+                } else {
+                        /* Otherwise, we need to do this packing because we
+                         * support multiple swizzlings of RGBA8888.
+                         */
+                        clear_color =
+                                pack_rgba(vc5->framebuffer.cbufs[0]->format,
+                                          color->f);
+                }
+#endif
+                clear_color = pack_rgba(vc5->framebuffer.cbufs[0]->format,
+                                        color->f);
+
+                job->clear_color[0] = job->clear_color[1] = clear_color;
+                rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0);
+        }
+
+        if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+                struct vc5_resource *rsc =
+                        vc5_resource(vc5->framebuffer.zsbuf->texture);
+                unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
+
+                if (buffers & PIPE_CLEAR_DEPTH) {
+                        job->clear_zs |=
+                                util_pack_z_stencil(PIPE_FORMAT_S8_UINT_Z24_UNORM,
+                                                    depth, 0);
+                }
+                if (buffers & PIPE_CLEAR_STENCIL) {
+                        job->clear_zs |=
+                                util_pack_z_stencil(PIPE_FORMAT_S8_UINT_Z24_UNORM,
+                                                    0, stencil);
+                }
+
+                rsc->initialized_buffers |= zsclear;
+        }
+
+        job->draw_min_x = 0;
+        job->draw_min_y = 0;
+        job->draw_max_x = vc5->framebuffer.width;
+        job->draw_max_y = vc5->framebuffer.height;
+        job->cleared |= buffers;
+        job->resolve |= buffers;
+
+        vc5_start_draw(vc5);
+}
+
+static void
+vc5_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
+                        const union pipe_color_union *color,
+                        unsigned x, unsigned y, unsigned w, unsigned h,
+                        bool render_condition_enabled)
+{
+        fprintf(stderr, "unimpl: clear RT\n");
+}
+
+static void
+vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
+                        unsigned buffers, double depth, unsigned stencil,
+                        unsigned x, unsigned y, unsigned w, unsigned h,
+                        bool render_condition_enabled)
+{
+        fprintf(stderr, "unimpl: clear DS\n");
+}
+
+void
+vc5_draw_init(struct pipe_context *pctx)
+{
+        pctx->draw_vbo = vc5_draw_vbo;
+        pctx->clear = vc5_clear;
+        pctx->clear_render_target = vc5_clear_render_target;
+        pctx->clear_depth_stencil = vc5_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/vc5/vc5_drm.h b/src/gallium/drivers/vc5/vc5_drm.h
new file mode 100644 (file)
index 0000000..e70cf9d
--- /dev/null
@@ -0,0 +1,191 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _VC5_DRM_H_
+#define _VC5_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_VC5_SUBMIT_CL                         0x00
+#define DRM_VC5_WAIT_SEQNO                        0x01
+#define DRM_VC5_WAIT_BO                           0x02
+#define DRM_VC5_CREATE_BO                         0x03
+#define DRM_VC5_MMAP_BO                           0x04
+#define DRM_VC5_GET_PARAM                         0x05
+#define DRM_VC5_GET_BO_OFFSET                     0x06
+
+#define DRM_IOCTL_VC5_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_SUBMIT_CL, struct drm_vc5_submit_cl)
+#define DRM_IOCTL_VC5_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_SEQNO, struct drm_vc5_wait_seqno)
+#define DRM_IOCTL_VC5_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_BO, struct drm_vc5_wait_bo)
+#define DRM_IOCTL_VC5_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_CREATE_BO, struct drm_vc5_create_bo)
+#define DRM_IOCTL_VC5_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_MMAP_BO, struct drm_vc5_mmap_bo)
+#define DRM_IOCTL_VC5_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_GET_PARAM, struct drm_vc5_get_param)
+#define DRM_IOCTL_VC5_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_GET_BO_OFFSET, struct drm_vc5_get_bo_offset)
+
+/**
+ * struct drm_vc5_submit_cl - ioctl argument for submitting commands to the 3D
+ * engine.
+ *
+ * This asks the kernel to have the GPU execute an optional binner
+ * command list, and a render command list.
+ */
+struct drm_vc5_submit_cl {
+       /* Pointer to the binner command list.
+        *
+        * This is the first set of commands executed, which runs the
+        * coordinate shader to determine where primitives land on the screen,
+        * then writes out the state updates and draw calls necessary per tile
+        * to the tile allocation BO.
+        */
+       __u32 bcl_start;
+
+        /** End address of the BCL (first byte after the BCL) */
+       __u32 bcl_end;
+
+       /* Offset of the render command list.
+        *
+        * This is the second set of commands executed, which will either
+        * execute the tiles that have been set up by the BCL, or a fixed set
+        * of tiles (in the case of RCL-only blits).
+        */
+       __u32 rcl_start;
+
+        /** End address of the RCL (first byte after the RCL) */
+       __u32 rcl_end;
+
+       /* Pointer to a u32 array of the BOs that are referenced by the job.
+        */
+       __u64 bo_handles;
+
+       /* Pointer to an array of chunks of extra submit CL information. (the
+        * chunk struct is not yet defined)
+        */
+       __u64 chunks;
+
+       /* Number of BO handles passed in (size is that times 4). */
+       __u32 bo_handle_count;
+
+       __u32 chunk_count;
+
+       __u64 flags;
+};
+
+/**
+ * struct drm_vc5_wait_seqno - ioctl argument for waiting for
+ * DRM_VC5_SUBMIT_CL completion using its returned seqno.
+ *
+ * timeout_ns is the timeout in nanoseconds, where "0" means "don't
+ * block, just return the status."
+ */
+struct drm_vc5_wait_seqno {
+       __u64 seqno;
+       __u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc5_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_VC5_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_vc5_wait_bo {
+       __u32 handle;
+       __u32 pad;
+       __u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc5_create_bo - ioctl argument for creating VC5 BOs.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc5_create_bo {
+       __u32 size;
+       __u32 flags;
+       /** Returned GEM handle for the BO. */
+       __u32 handle;
+       /**
+        * Returned offset for the BO in the V3D address space.  This offset
+        * is private to the DRM fd and is valid for the lifetime of the GEM
+        * handle.
+        */
+       __u32 offset;
+};
+
+/**
+ * struct drm_vc5_mmap_bo - ioctl argument for mapping VC5 BOs.
+ *
+ * This doesn't actually perform an mmap.  Instead, it returns the
+ * offset you need to use in an mmap on the DRM device node.  This
+ * means that tools like valgrind end up knowing about the mapped
+ * memory.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc5_mmap_bo {
+       /** Handle for the object being mapped. */
+       __u32 handle;
+       __u32 flags;
+       /** offset into the drm node to use for subsequent mmap call. */
+       __u64 offset;
+};
+
+enum drm_vc5_param {
+        DRM_VC5_PARAM_V3D_UIFCFG,
+        DRM_VC5_PARAM_V3D_HUB_IDENT1,
+        DRM_VC5_PARAM_V3D_HUB_IDENT2,
+        DRM_VC5_PARAM_V3D_HUB_IDENT3,
+        DRM_VC5_PARAM_V3D_CORE0_IDENT0,
+        DRM_VC5_PARAM_V3D_CORE0_IDENT1,
+        DRM_VC5_PARAM_V3D_CORE0_IDENT2,
+};
+
+struct drm_vc5_get_param {
+       __u32 param;
+       __u32 pad;
+       __u64 value;
+};
+
+/**
+ * Returns the offset for the BO in the V3D address space for this DRM fd.
+ * This is the same value returned by drm_vc5_create_bo, if that was called
+ * from this DRM fd.
+ */
+struct drm_vc5_get_bo_offset {
+       __u32 handle;
+       __u32 offset;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _VC5_DRM_H_ */
diff --git a/src/gallium/drivers/vc5/vc5_emit.c b/src/gallium/drivers/vc5/vc5_emit.c
new file mode 100644 (file)
index 0000000..29ccfcd
--- /dev/null
@@ -0,0 +1,449 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_half.h"
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+static uint8_t
+vc5_factor(enum pipe_blendfactor factor)
+{
+        /* We may get a bad blendfactor when blending is disabled. */
+        if (factor == 0)
+                return V3D_BLEND_FACTOR_ZERO;
+
+        switch (factor) {
+        case PIPE_BLENDFACTOR_ZERO:
+        case PIPE_BLENDFACTOR_ONE:
+                return V3D_BLEND_FACTOR_ONE;
+        case PIPE_BLENDFACTOR_SRC_COLOR:
+                return V3D_BLEND_FACTOR_SRC_COLOR;
+        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+                return V3D_BLEND_FACTOR_INV_SRC_COLOR;
+        case PIPE_BLENDFACTOR_DST_COLOR:
+                return V3D_BLEND_FACTOR_DST_COLOR;
+        case PIPE_BLENDFACTOR_INV_DST_COLOR:
+                return V3D_BLEND_FACTOR_INV_DST_COLOR;
+        case PIPE_BLENDFACTOR_SRC_ALPHA:
+                return V3D_BLEND_FACTOR_SRC_ALPHA;
+        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+                return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
+        case PIPE_BLENDFACTOR_DST_ALPHA:
+                return V3D_BLEND_FACTOR_DST_ALPHA;
+        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+                return V3D_BLEND_FACTOR_INV_DST_ALPHA;
+        case PIPE_BLENDFACTOR_CONST_COLOR:
+                return V3D_BLEND_FACTOR_CONST_COLOR;
+        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+                return V3D_BLEND_FACTOR_INV_CONST_COLOR;
+        case PIPE_BLENDFACTOR_CONST_ALPHA:
+                return V3D_BLEND_FACTOR_CONST_ALPHA;
+        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+                return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
+        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+                return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
+        default:
+                unreachable("Bad blend factor");
+        }
+}
+
+static inline uint16_t
+swizzled_border_color(struct pipe_sampler_state *sampler,
+                      struct vc5_sampler_view *sview,
+                      int chan)
+{
+        const struct util_format_description *desc =
+                util_format_description(sview->base.format);
+        uint8_t swiz = chan;
+
+        /* If we're doing swizzling in the sampler, then only rearrange the
+         * border color for the mismatch between the VC5 texture format and
+         * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
+         * the sampler's swizzle.
+         *
+         * For swizzling in the shader, we don't do any pre-swizzling of the
+         * border color.
+         */
+        if (vc5_get_tex_return_size(sview->base.format) != 32)
+                swiz = desc->swizzle[swiz];
+
+        switch (swiz) {
+        case PIPE_SWIZZLE_0:
+                return util_float_to_half(0.0);
+        case PIPE_SWIZZLE_1:
+                return util_float_to_half(1.0);
+        default:
+                return util_float_to_half(sampler->border_color.f[swiz]);
+        }
+}
+
+static void
+emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex,
+                 int i)
+{
+        struct vc5_job *job = vc5->job;
+        struct pipe_sampler_state *psampler = stage_tex->samplers[i];
+        struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
+        struct pipe_sampler_view *psview = stage_tex->textures[i];
+        struct vc5_sampler_view *sview = vc5_sampler_view(psview);
+        struct pipe_resource *prsc = psview->texture;
+        struct vc5_resource *rsc = vc5_resource(prsc);
+
+        stage_tex->texture_state[i].offset =
+                vc5_cl_ensure_space(&job->indirect,
+                                    cl_packet_length(TEXTURE_SHADER_STATE),
+                                    32);
+        vc5_bo_set_reference(&stage_tex->texture_state[i].bo,
+                             job->indirect.bo);
+
+        struct V3D33_TEXTURE_SHADER_STATE unpacked = {
+                /* XXX */
+                .border_color_red = swizzled_border_color(psampler, sview, 0),
+                .border_color_green = swizzled_border_color(psampler, sview, 1),
+                .border_color_blue = swizzled_border_color(psampler, sview, 2),
+                .border_color_alpha = swizzled_border_color(psampler, sview, 3),
+
+                /* XXX: Disable min/maxlod for txf */
+                .max_level_of_detail = MIN2(MIN2(psampler->max_lod,
+                                                 VC5_MAX_MIP_LEVELS),
+                                            psview->u.tex.last_level),
+
+                .texture_base_pointer = cl_address(rsc->bo,
+                                                   rsc->slices[0].offset),
+        };
+
+        int min_img_filter = psampler->min_img_filter;
+        int min_mip_filter = psampler->min_mip_filter;
+        int mag_img_filter = psampler->mag_img_filter;
+
+        if (vc5_get_tex_return_size(psview->format) == 32) {
+                min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
+                mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+                mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+        }
+
+        bool min_nearest = (min_img_filter == PIPE_TEX_FILTER_NEAREST);
+        switch (min_mip_filter) {
+        case PIPE_TEX_MIPFILTER_NONE:
+                unpacked.minification_filter = 0 + min_nearest;
+                break;
+        case PIPE_TEX_MIPFILTER_NEAREST:
+                unpacked.minification_filter = 2 + !min_nearest;
+                break;
+        case PIPE_TEX_MIPFILTER_LINEAR:
+                unpacked.minification_filter = 4 + !min_nearest;
+                break;
+        }
+        unpacked.magnification_filter = (mag_img_filter ==
+                                         PIPE_TEX_FILTER_NEAREST);
+
+        uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
+        cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
+
+        for (int i = 0; i < ARRAY_SIZE(packed); i++)
+                packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
+
+        cl_emit_prepacked(&job->indirect, &packed);
+}
+
+static void
+emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex)
+{
+        for (int i = 0; i < stage_tex->num_textures; i++)
+                emit_one_texture(vc5, stage_tex, i);
+}
+
+void
+vc5_emit_state(struct pipe_context *pctx)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_job *job = vc5->job;
+
+        if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
+                          VC5_DIRTY_RASTERIZER)) {
+                float *vpscale = vc5->viewport.scale;
+                float *vptranslate = vc5->viewport.translate;
+                float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
+                float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
+                float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
+                float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
+
+                /* Clip to the scissor if it's enabled, but still clip to the
+                 * drawable regardless since that controls where the binner
+                 * tries to put things.
+                 *
+                 * Additionally, always clip the rendering to the viewport,
+                 * since the hardware does guardband clipping, meaning
+                 * primitives would rasterize outside of the view volume.
+                 */
+                uint32_t minx, miny, maxx, maxy;
+                if (!vc5->rasterizer->base.scissor) {
+                        minx = MAX2(vp_minx, 0);
+                        miny = MAX2(vp_miny, 0);
+                        maxx = MIN2(vp_maxx, job->draw_width);
+                        maxy = MIN2(vp_maxy, job->draw_height);
+                } else {
+                        minx = MAX2(vp_minx, vc5->scissor.minx);
+                        miny = MAX2(vp_miny, vc5->scissor.miny);
+                        maxx = MIN2(vp_maxx, vc5->scissor.maxx);
+                        maxy = MIN2(vp_maxy, vc5->scissor.maxy);
+                }
+
+                cl_emit(&job->bcl, CLIP_WINDOW, clip) {
+                        clip.clip_window_left_pixel_coordinate = minx;
+                        clip.clip_window_bottom_pixel_coordinate = miny;
+                        clip.clip_window_height_in_pixels = maxy - miny;
+                        clip.clip_window_width_in_pixels = maxx - minx;
+                        clip.clip_window_height_in_pixels = maxy - miny;
+                }
+
+                job->draw_min_x = MIN2(job->draw_min_x, minx);
+                job->draw_min_y = MIN2(job->draw_min_y, miny);
+                job->draw_max_x = MAX2(job->draw_max_x, maxx);
+                job->draw_max_y = MAX2(job->draw_max_y, maxy);
+        }
+
+        if (vc5->dirty & (VC5_DIRTY_RASTERIZER |
+                          VC5_DIRTY_ZSA |
+                          VC5_DIRTY_BLEND |
+                          VC5_DIRTY_COMPILED_FS)) {
+                cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
+                        config.enable_forward_facing_primitive =
+                                !(vc5->rasterizer->base.cull_face &
+                                  PIPE_FACE_FRONT);
+                        config.enable_reverse_facing_primitive =
+                                !(vc5->rasterizer->base.cull_face &
+                                  PIPE_FACE_BACK);
+                        /* This seems backwards, but it's what gets the
+                         * clipflat test to pass.
+                         */
+                        config.clockwise_primitives =
+                                vc5->rasterizer->base.front_ccw;
+
+                        config.enable_depth_offset =
+                                vc5->rasterizer->base.offset_tri;
+
+                        config.rasterizer_oversample_mode =
+                                vc5->rasterizer->base.multisample;
+
+                        config.blend_enable = vc5->blend->rt[0].blend_enable;
+
+                        config.early_z_updates_enable = true;
+                        if (vc5->zsa->base.depth.enabled) {
+                                config.z_updates_enable =
+                                        vc5->zsa->base.depth.writemask;
+                                config.early_z_enable =
+                                        vc5->zsa->early_z_enable;
+                                config.depth_test_function =
+                                        vc5->zsa->base.depth.func;
+                        } else {
+                                config.depth_test_function = PIPE_FUNC_ALWAYS;
+                        }
+                }
+
+        }
+
+        if (vc5->dirty & VC5_DIRTY_RASTERIZER) {
+                cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
+                        depth.depth_offset_factor =
+                                vc5->rasterizer->offset_factor;
+                        depth.depth_offset_units =
+                                vc5->rasterizer->offset_units;
+                }
+
+                cl_emit(&job->bcl, POINT_SIZE, point_size) {
+                        point_size.point_size = vc5->rasterizer->point_size;
+                }
+
+                cl_emit(&job->bcl, LINE_WIDTH, line_width) {
+                        line_width.line_width = vc5->rasterizer->base.line_width;
+                }
+        }
+
+        if (vc5->dirty & VC5_DIRTY_VIEWPORT) {
+                cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
+                        clip.viewport_half_width_in_1_256th_of_pixel =
+                                vc5->viewport.scale[0] * 256.0f;
+                        clip.viewport_half_height_in_1_256th_of_pixel =
+                                vc5->viewport.scale[1] * 256.0f;
+                }
+
+                cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+                        clip.viewport_z_offset_zc_to_zs =
+                                vc5->viewport.translate[2];
+                        clip.viewport_z_scale_zc_to_zs =
+                                vc5->viewport.scale[2];
+                }
+                if (0 /* XXX */) {
+                cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
+                        clip.minimum_zw = (vc5->viewport.translate[2] -
+                                           vc5->viewport.scale[2]);
+                        clip.maximum_zw = (vc5->viewport.translate[2] +
+                                           vc5->viewport.scale[2]);
+                }
+                }
+
+                cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
+                        vp.viewport_centre_x_coordinate =
+                                vc5->viewport.translate[0];
+                        vp.viewport_centre_y_coordinate =
+                                vc5->viewport.translate[1];
+                }
+        }
+
+        if (vc5->dirty & VC5_DIRTY_BLEND) {
+                struct pipe_blend_state *blend = vc5->blend;
+
+                cl_emit(&job->bcl, BLEND_CONFIG, config) {
+                        struct pipe_rt_blend_state *rtblend = &blend->rt[0];
+
+                        config.colour_blend_mode = rtblend->rgb_func;
+                        config.colour_blend_dst_factor =
+                                vc5_factor(rtblend->rgb_dst_factor);
+                        config.colour_blend_src_factor =
+                                vc5_factor(rtblend->rgb_src_factor);
+
+                        config.alpha_blend_mode = rtblend->alpha_func;
+                        config.alpha_blend_dst_factor =
+                                vc5_factor(rtblend->alpha_dst_factor);
+                        config.alpha_blend_src_factor =
+                                vc5_factor(rtblend->alpha_src_factor);
+                }
+
+                cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
+                        mask.render_target_0_per_colour_component_write_masks =
+                                (~blend->rt[0].colormask) & 0xf;
+                        mask.render_target_1_per_colour_component_write_masks =
+                                (~blend->rt[1].colormask) & 0xf;
+                        mask.render_target_2_per_colour_component_write_masks =
+                                (~blend->rt[2].colormask) & 0xf;
+                        mask.render_target_3_per_colour_component_write_masks =
+                                (~blend->rt[3].colormask) & 0xf;
+                }
+        }
+
+        if (vc5->dirty & VC5_DIRTY_BLEND_COLOR) {
+                cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
+                        /* XXX: format-dependent swizzling */
+                        colour.red_f16 = vc5->blend_color.hf[2];
+                        colour.green_f16 = vc5->blend_color.hf[1];
+                        colour.blue_f16 = vc5->blend_color.hf[0];
+                        colour.alpha_f16 = vc5->blend_color.hf[3];
+                }
+        }
+
+        if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
+                struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0];
+                struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1];
+
+                cl_emit(&job->bcl, STENCIL_CONFIG, config) {
+                        config.front_config = true;
+                        config.back_config = !back->enabled;
+
+                        config.stencil_write_mask = front->writemask;
+                        config.stencil_test_mask = front->valuemask;
+
+                        config.stencil_test_function = front->func;
+                        config.stencil_pass_op = front->zpass_op;
+                        config.depth_test_fail_op = front->zfail_op;
+                        config.stencil_test_fail_op = front->fail_op;
+
+                        config.stencil_ref_value = vc5->stencil_ref.ref_value[0];
+                }
+
+                if (back->enabled) {
+                        cl_emit(&job->bcl, STENCIL_CONFIG, config) {
+                                config.front_config = false;
+                                config.back_config = true;
+
+                                config.stencil_write_mask = back->writemask;
+                                config.stencil_test_mask = back->valuemask;
+
+                                config.stencil_test_function = back->func;
+                                config.stencil_pass_op = back->zpass_op;
+                                config.depth_test_fail_op = back->zfail_op;
+                                config.stencil_test_fail_op = back->fail_op;
+
+                                config.stencil_ref_value =
+                                        vc5->stencil_ref.ref_value[1];
+                        }
+                }
+        }
+
+        if (vc5->dirty & VC5_DIRTY_FRAGTEX)
+                emit_textures(vc5, &vc5->fragtex);
+
+        if (vc5->dirty & VC5_DIRTY_VERTTEX)
+                emit_textures(vc5, &vc5->fragtex);
+
+        if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
+                /* XXX: Need to handle more than 24 entries. */
+                cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
+                        flags.varying_offset_v0 = 0;
+
+                        flags.flat_shade_flags_for_varyings_v024 =
+                                vc5->prog.fs->prog_data.fs->flat_shade_flags[0] & 0xfffff;
+
+                        if (vc5->rasterizer->base.flatshade) {
+                                flags.flat_shade_flags_for_varyings_v024 |=
+                                        vc5->prog.fs->prog_data.fs->color_inputs[0] & 0xfffff;
+                        }
+                }
+        }
+
+        if (vc5->dirty & VC5_DIRTY_STREAMOUT) {
+                struct vc5_streamout_stateobj *so = &vc5->streamout;
+
+                if (so->num_targets) {
+                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
+                                tfe.number_of_32_bit_output_buffer_address_following =
+                                        so->num_targets;
+                                tfe.number_of_16_bit_output_data_specs_following =
+                                        vc5->prog.bind_vs->num_tf_specs;
+                        };
+
+                        for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) {
+                                cl_emit_prepacked(&job->bcl,
+                                                  &vc5->prog.bind_vs->tf_specs[i]);
+                        }
+
+                        for (int i = 0; i < so->num_targets; i++) {
+                                const struct pipe_stream_output_target *target =
+                                        so->targets[i];
+                                struct vc5_resource *rsc =
+                                        vc5_resource(target->buffer);
+
+                                cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
+                                        output.address =
+                                                cl_address(rsc->bo,
+                                                           target->buffer_offset);
+                                };
+                                /* XXX: buffer_size? */
+                        }
+                } else {
+                        /* XXX? */
+                }
+        }
+}
diff --git a/src/gallium/drivers/vc5/vc5_fence.c b/src/gallium/drivers/vc5/vc5_fence.c
new file mode 100644 (file)
index 0000000..08de9bc
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_fence.c
+ *
+ * Seqno-based fence management.
+ *
+ * We have two mechanisms for waiting in our kernel API: You can wait on a BO
+ * to have all rendering to from any process to be completed, or wait on a
+ * seqno for that particular seqno to be passed.  The fence API we're
+ * implementing is based on waiting for all rendering in the context to have
+ * completed (with no reference to what other processes might be doing with
+ * the same BOs), so we can just use the seqno of the last rendering we'd
+ * fired off as our fence marker.
+ */
+
+#include "util/u_inlines.h"
+
+#include "vc5_screen.h"
+#include "vc5_bufmgr.h"
+
+struct vc5_fence {
+        struct pipe_reference reference;
+        uint64_t seqno;
+};
+
+static void
+vc5_fence_reference(struct pipe_screen *pscreen,
+                    struct pipe_fence_handle **pp,
+                    struct pipe_fence_handle *pf)
+{
+        struct vc5_fence **p = (struct vc5_fence **)pp;
+        struct vc5_fence *f = (struct vc5_fence *)pf;
+        struct vc5_fence *old = *p;
+
+        if (pipe_reference(&(*p)->reference, &f->reference)) {
+                free(old);
+        }
+        *p = f;
+}
+
+static boolean
+vc5_fence_finish(struct pipe_screen *pscreen,
+                struct pipe_context *ctx,
+                 struct pipe_fence_handle *pf,
+                 uint64_t timeout_ns)
+{
+        struct vc5_screen *screen = vc5_screen(pscreen);
+        struct vc5_fence *f = (struct vc5_fence *)pf;
+
+        return vc5_wait_seqno(screen, f->seqno, timeout_ns, "fence wait");
+}
+
+struct vc5_fence *
+vc5_fence_create(struct vc5_screen *screen, uint64_t seqno)
+{
+        struct vc5_fence *f = calloc(1, sizeof(*f));
+
+        if (!f)
+                return NULL;
+
+        pipe_reference_init(&f->reference, 1);
+        f->seqno = seqno;
+
+        return f;
+}
+
+void
+vc5_fence_init(struct vc5_screen *screen)
+{
+        screen->base.fence_reference = vc5_fence_reference;
+        screen->base.fence_finish = vc5_fence_finish;
+}
diff --git a/src/gallium/drivers/vc5/vc5_formats.c b/src/gallium/drivers/vc5/vc5_formats.c
new file mode 100644 (file)
index 0000000..fe26002
--- /dev/null
@@ -0,0 +1,415 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc5_formats.c
+ *
+ * Contains the table and accessors for VC5 texture and render target format
+ * support.
+ *
+ * The hardware has limited support for texture formats, and extremely limited
+ * support for render target formats.  As a result, we emulate other formats
+ * in our shader code, and this stores the table for doing so.
+ */
+
+#include "util/u_format.h"
+#include "util/macros.h"
+
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+#define OUTPUT_IMAGE_FORMAT_NO 255
+
+struct vc5_format {
+        /** Set if the pipe format is defined in the table. */
+        bool present;
+
+        /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
+        uint8_t rt_type;
+
+        /** One of V3D33_TEXTURE_DATA_FORMAT_*. */
+        uint8_t tex_type;
+
+        /**
+         * Swizzle to apply to the RGBA shader output for storing to the tile
+         * buffer, to the RGBA tile buffer to produce shader input (for
+         * blending), and for turning the rgba8888 texture sampler return
+         * value into shader rgba values.
+         */
+        uint8_t swizzle[4];
+
+        /* Whether the return value is 16F/I/UI or 32F/I/UI. */
+        uint8_t return_size;
+
+        /* If return_size == 32, how many channels are returned by texturing.
+         * 16 always returns 2 pairs of 16 bit values.
+         */
+        uint8_t return_channels;
+};
+
+#define SWIZ(x,y,z,w) {          \
+        PIPE_SWIZZLE_##x, \
+        PIPE_SWIZZLE_##y, \
+        PIPE_SWIZZLE_##z, \
+        PIPE_SWIZZLE_##w  \
+}
+
+#define FORMAT(pipe, rt, tex, swiz, return_size, return_channels)       \
+        [PIPE_FORMAT_##pipe] = {                                        \
+                true,                                                   \
+                OUTPUT_IMAGE_FORMAT_##rt,                               \
+                TEXTURE_DATA_FORMAT_##tex,                              \
+                swiz,                                                   \
+                return_size,                                            \
+                return_channels,                                        \
+        }
+
+#define SWIZ_X001      SWIZ(X, 0, 0, 1)
+#define SWIZ_XY01      SWIZ(X, Y, 0, 1)
+#define SWIZ_XYZ1      SWIZ(X, Y, Z, 1)
+#define SWIZ_XYZW      SWIZ(X, Y, Z, W)
+#define SWIZ_YZWX      SWIZ(Y, Z, W, X)
+#define SWIZ_YZW1      SWIZ(Y, Z, W, 1)
+#define SWIZ_ZYXW      SWIZ(Z, Y, X, W)
+#define SWIZ_ZYX1      SWIZ(Z, Y, X, 1)
+#define SWIZ_XXXY      SWIZ(X, X, X, Y)
+#define SWIZ_XXX1      SWIZ(X, X, X, 1)
+#define SWIZ_XXXX      SWIZ(X, X, X, X)
+#define SWIZ_000X      SWIZ(0, 0, 0, X)
+
+static const struct vc5_format vc5_format_table[] = {
+        FORMAT(B8G8R8A8_UNORM,    RGBA8,        RGBA8,       SWIZ_ZYXW, 16, 0),
+        FORMAT(B8G8R8X8_UNORM,    RGBX8,        RGBA8,       SWIZ_ZYX1, 16, 0),
+        FORMAT(B8G8R8A8_SRGB,     SRGB8_ALPHA8, RGBA8,       SWIZ_ZYXW, 16, 0),
+        FORMAT(B8G8R8X8_SRGB,     SRGBX8,       RGBA8,       SWIZ_ZYX1, 16, 0),
+        FORMAT(R8G8B8A8_UNORM,    RGBA8,        RGBA8,       SWIZ_XYZW, 16, 0),
+        FORMAT(R8G8B8X8_UNORM,    RGBX8,        RGBA8,       SWIZ_XYZ1, 16, 0),
+        FORMAT(R8G8B8A8_SNORM,    NO,           RGBA8_SNORM, SWIZ_XYZW, 16, 0),
+        FORMAT(R8G8B8X8_SNORM,    NO,           RGBA8_SNORM, SWIZ_XYZ1, 16, 0),
+        FORMAT(B10G10R10A2_UNORM, RGB10_A2,     RGB10_A2,    SWIZ_ZYXW, 16, 0),
+
+        FORMAT(B4G4R4A4_UNORM,    ABGR4444,     RGBA4,       SWIZ_YZWX, 16, 0),
+        FORMAT(B4G4R4X4_UNORM,    ABGR4444,     RGBA4,       SWIZ_YZW1, 16, 0),
+
+        FORMAT(B5G5R5A1_UNORM,    NO,           RGB5_A1,     SWIZ_YZWX, 16, 0),
+        FORMAT(B5G5R5X1_UNORM,    NO,           RGB5_A1,     SWIZ_YZW1, 16, 0),
+        FORMAT(B5G6R5_UNORM,      BGR565,       RGB565,      SWIZ_XYZ1, 16, 0),
+
+        FORMAT(R8_UNORM,          R8,           R8,          SWIZ_X001, 16, 0),
+        FORMAT(R8_SNORM,          NO,           R8_SNORM,    SWIZ_X001, 16, 0),
+        FORMAT(R8G8_UNORM,        RG8,          RG8,         SWIZ_XY01, 16, 0),
+        FORMAT(R8G8_SNORM,        NO,           RG8_SNORM,   SWIZ_XY01, 16, 0),
+
+        FORMAT(R16_UNORM,         NO,           R16,         SWIZ_X001, 32, 1),
+        FORMAT(R16_SNORM,         NO,           R16_SNORM,   SWIZ_X001, 32, 1),
+        FORMAT(R16_FLOAT,         R16F,         R16F,        SWIZ_X001, 16, 0),
+        FORMAT(R32_FLOAT,         R32F,         R32F,        SWIZ_X001, 32, 1),
+
+        FORMAT(R16G16_UNORM,      NO,           RG16,        SWIZ_XY01, 32, 2),
+        FORMAT(R16G16_SNORM,      NO,           RG16_SNORM,  SWIZ_XY01, 32, 2),
+        FORMAT(R16G16_FLOAT,      RG16F,        RG16F,       SWIZ_XY01, 16, 0),
+        FORMAT(R32G32_FLOAT,      RG32F,        RG32F,       SWIZ_XY01, 32, 2),
+
+        FORMAT(R16G16B16A16_UNORM, NO,          RGBA16,      SWIZ_XYZW, 32, 4),
+        FORMAT(R16G16B16A16_SNORM, NO,          RGBA16_SNORM, SWIZ_XYZW, 32, 4),
+        FORMAT(R16G16B16A16_FLOAT, RGBA16F,     RGBA16F,     SWIZ_XYZW, 16, 0),
+        FORMAT(R32G32B32A32_FLOAT, RGBA32F,     RGBA32F,     SWIZ_XYZW, 32, 4),
+
+        /* If we don't have L/A/LA16, mesa/st will fall back to RGBA16. */
+        FORMAT(L16_UNORM,         NO,           R16,         SWIZ_XXX1, 32, 1),
+        FORMAT(L16_SNORM,         NO,           R16_SNORM,   SWIZ_XXX1, 32, 1),
+        FORMAT(I16_UNORM,         NO,           R16,         SWIZ_XXXX, 32, 1),
+        FORMAT(I16_SNORM,         NO,           R16_SNORM,   SWIZ_XXXX, 32, 1),
+        FORMAT(A16_UNORM,         NO,           R16,         SWIZ_000X, 32, 1),
+        FORMAT(A16_SNORM,         NO,           R16_SNORM,   SWIZ_000X, 32, 1),
+        FORMAT(L16A16_UNORM,      NO,           RG16,        SWIZ_XXXY, 32, 2),
+        FORMAT(L16A16_SNORM,      NO,           RG16_SNORM,  SWIZ_XXXY, 32, 2),
+
+        FORMAT(A8_UNORM,          NO,           R8,          SWIZ_000X, 16, 0),
+        FORMAT(L8_UNORM,          NO,           R8,          SWIZ_XXX1, 16, 0),
+        FORMAT(I8_UNORM,          NO,           R8,          SWIZ_XXXX, 16, 0),
+        FORMAT(L8A8_UNORM,        NO,           RG8,         SWIZ_XXXY, 16, 0),
+
+        FORMAT(R8_SINT,           R8I,          S8,          SWIZ_X001, 16, 0),
+        FORMAT(R8_UINT,           R8UI,         S8,          SWIZ_X001, 16, 0),
+        FORMAT(R8G8_SINT,         RG8I,         S16,         SWIZ_XY01, 16, 0),
+        FORMAT(R8G8_UINT,         RG8UI,        S16,         SWIZ_XY01, 16, 0),
+        FORMAT(R8G8B8A8_SINT,     RGBA8I,       R32F,        SWIZ_XYZW, 16, 0),
+        FORMAT(R8G8B8A8_UINT,     RGBA8UI,      R32F,        SWIZ_XYZW, 16, 0),
+
+        FORMAT(R16_SINT,          R16I,         S16,         SWIZ_X001, 16, 0),
+        FORMAT(R16_UINT,          R16UI,        S16,         SWIZ_X001, 16, 0),
+        FORMAT(R16G16_SINT,       RG16I,        R32F,        SWIZ_XY01, 16, 0),
+        FORMAT(R16G16_UINT,       RG16UI,       R32F,        SWIZ_XY01, 16, 0),
+        FORMAT(R16G16B16A16_SINT, RGBA16I,      RG32F,       SWIZ_XYZW, 16, 0),
+        FORMAT(R16G16B16A16_UINT, RGBA16UI,     RG32F,       SWIZ_XYZW, 16, 0),
+
+        FORMAT(R32_SINT,          R32I,         R32F,        SWIZ_X001, 16, 0),
+        FORMAT(R32_UINT,          R32UI,        R32F,        SWIZ_X001, 16, 0),
+        FORMAT(R32G32_SINT,       RG32I,        RG32F,       SWIZ_XY01, 16, 0),
+        FORMAT(R32G32_UINT,       RG32UI,       RG32F,       SWIZ_XY01, 16, 0),
+        FORMAT(R32G32B32A32_SINT, RGBA32I,      RGBA32F,     SWIZ_XYZW, 16, 0),
+        FORMAT(R32G32B32A32_UINT, RGBA32UI,     RGBA32F,     SWIZ_XYZW, 16, 0),
+
+        FORMAT(A8_SINT,           R8I,          S8,          SWIZ_000X, 16, 0),
+        FORMAT(A8_UINT,           R8UI,         S8,          SWIZ_000X, 16, 0),
+        FORMAT(A16_SINT,          R16I,         S16,         SWIZ_000X, 16, 0),
+        FORMAT(A16_UINT,          R16UI,        S16,         SWIZ_000X, 16, 0),
+        FORMAT(A32_SINT,          R32I,         R32F,        SWIZ_000X, 16, 0),
+        FORMAT(A32_UINT,          R32UI,        R32F,        SWIZ_000X, 16, 0),
+
+        FORMAT(R11G11B10_FLOAT,   R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZW, 16, 0),
+        FORMAT(R9G9B9E5_FLOAT,    NO,           RGB9_E5,     SWIZ_XYZW, 16, 0),
+
+        FORMAT(S8_UINT_Z24_UNORM, DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_X001, 32, 1),
+        FORMAT(X8Z24_UNORM,       DEPTH_COMPONENT24, DEPTH24_X8, SWIZ_X001, 32, 1),
+        FORMAT(S8X24_UINT,        NO,           R32F,        SWIZ_X001, 32, 1),
+        FORMAT(Z32_FLOAT,         DEPTH_COMPONENT32F, R32F, SWIZ_X001, 32, 1),
+
+        /* Pretend we support this, but it'll be separate Z32F depth and S8. */
+        FORMAT(Z32_FLOAT_S8X24_UINT, DEPTH_COMPONENT32F, R32F, SWIZ_X001, 32, 1),
+
+        FORMAT(ETC2_RGB8,         NO,           RGB8_ETC2,   SWIZ_XYZ1, 16, 0),
+        FORMAT(ETC2_SRGB8,        NO,           RGB8_ETC2,   SWIZ_XYZ1, 16, 0),
+        FORMAT(ETC2_RGB8A1,       NO,           RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0),
+        FORMAT(ETC2_SRGB8A1,      NO,           RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0),
+        FORMAT(ETC2_RGBA8,        NO,           RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0),
+        FORMAT(ETC2_R11_UNORM,    NO,           R11_EAC,     SWIZ_X001, 16, 0),
+        FORMAT(ETC2_R11_SNORM,    NO,           SIGNED_R11_EAC, SWIZ_X001, 16, 0),
+        FORMAT(ETC2_RG11_UNORM,   NO,           RG11_EAC,    SWIZ_XY01, 16, 0),
+        FORMAT(ETC2_RG11_SNORM,   NO,           SIGNED_RG11_EAC, SWIZ_XY01, 16, 0),
+
+        FORMAT(DXT1_RGB,          NO,           BC1,         SWIZ_XYZ1, 16, 0),
+        FORMAT(DXT3_RGBA,         NO,           BC2,         SWIZ_XYZ1, 16, 0),
+        FORMAT(DXT5_RGBA,         NO,           BC3,         SWIZ_XYZ1, 16, 0),
+};
+
+static const struct vc5_format *
+get_format(enum pipe_format f)
+{
+        if (f >= ARRAY_SIZE(vc5_format_table) ||
+            !vc5_format_table[f].present)
+                return NULL;
+        else
+                return &vc5_format_table[f];
+}
+
+bool
+vc5_rt_format_supported(enum pipe_format f)
+{
+        const struct vc5_format *vf = get_format(f);
+
+        if (!vf)
+                return false;
+
+        return vf->rt_type != OUTPUT_IMAGE_FORMAT_NO;
+}
+
+uint8_t
+vc5_get_rt_format(enum pipe_format f)
+{
+        const struct vc5_format *vf = get_format(f);
+
+        if (!vf)
+                return 0;
+
+        return vf->rt_type;
+}
+
+bool
+vc5_tex_format_supported(enum pipe_format f)
+{
+        const struct vc5_format *vf = get_format(f);
+
+        return vf != NULL;
+}
+
+uint8_t
+vc5_get_tex_format(enum pipe_format f)
+{
+        const struct vc5_format *vf = get_format(f);
+
+        if (!vf)
+                return 0;
+
+        return vf->tex_type;
+}
+
+uint8_t
+vc5_get_tex_return_size(enum pipe_format f)
+{
+        const struct vc5_format *vf = get_format(f);
+
+        if (!vf)
+                return 0;
+
+        return vf->return_size;
+}
+
+uint8_t
+vc5_get_tex_return_channels(enum pipe_format f)
+{
+        const struct vc5_format *vf = get_format(f);
+
+        if (!vf)
+                return 0;
+
+        return vf->return_channels;
+}
+
+const uint8_t *
+vc5_get_format_swizzle(enum pipe_format f)
+{
+        const struct vc5_format *vf = get_format(f);
+        static const uint8_t fallback[] = {0, 1, 2, 3};
+
+        if (!vf)
+                return fallback;
+
+        return vf->swizzle;
+}
+
+void
+vc5_get_internal_type_bpp_for_output_format(uint32_t format,
+                                            uint32_t *type,
+                                            uint32_t *bpp)
+{
+        switch (format) {
+        case OUTPUT_IMAGE_FORMAT_RGBA8:
+        case OUTPUT_IMAGE_FORMAT_RGBX8:
+        case OUTPUT_IMAGE_FORMAT_RGB8:
+        case OUTPUT_IMAGE_FORMAT_RG8:
+        case OUTPUT_IMAGE_FORMAT_R8:
+        case OUTPUT_IMAGE_FORMAT_ABGR4444:
+        case OUTPUT_IMAGE_FORMAT_BGR565:
+        case OUTPUT_IMAGE_FORMAT_ABGR1555:
+                *type = INTERNAL_TYPE_8;
+                *bpp = INTERNAL_BPP_32;
+                break;
+
+        case OUTPUT_IMAGE_FORMAT_RGBA8I:
+        case OUTPUT_IMAGE_FORMAT_RG8I:
+        case OUTPUT_IMAGE_FORMAT_R8I:
+                *type = INTERNAL_TYPE_8I;
+                *bpp = INTERNAL_BPP_32;
+                break;
+
+        case OUTPUT_IMAGE_FORMAT_RGBA8UI:
+        case OUTPUT_IMAGE_FORMAT_RG8UI:
+        case OUTPUT_IMAGE_FORMAT_R8UI:
+                *type = INTERNAL_TYPE_8UI;
+                *bpp = INTERNAL_BPP_32;
+                break;
+
+        case OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8:
+        case OUTPUT_IMAGE_FORMAT_SRGB:
+        case OUTPUT_IMAGE_FORMAT_RGB10_A2:
+        case OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F:
+        case OUTPUT_IMAGE_FORMAT_SRGBX8:
+        case OUTPUT_IMAGE_FORMAT_RGBA16F:
+                /* Note that sRGB RTs are stored in the tile buffer at 16F,
+                 * and the conversion to sRGB happens at tilebuffer
+                 * load/store.
+                 */
+                *type = INTERNAL_TYPE_16F;
+                *bpp = INTERNAL_BPP_64;
+                break;
+
+        case OUTPUT_IMAGE_FORMAT_RG16F:
+        case OUTPUT_IMAGE_FORMAT_R16F:
+                *type = INTERNAL_TYPE_16F;
+                /* Use 64bpp to make sure the TLB doesn't throw away the alpha
+                 * channel before alpha test happens.
+                 */
+                *bpp = INTERNAL_BPP_64;
+                break;
+
+        case OUTPUT_IMAGE_FORMAT_RGBA16I:
+                *type = INTERNAL_TYPE_16I;
+                *bpp = INTERNAL_BPP_64;
+                break;
+        case OUTPUT_IMAGE_FORMAT_RG16I:
+        case OUTPUT_IMAGE_FORMAT_R16I:
+                *type = INTERNAL_TYPE_16I;
+                *bpp = INTERNAL_BPP_32;
+                break;
+
+        case OUTPUT_IMAGE_FORMAT_RGBA16UI:
+                *type = INTERNAL_TYPE_16UI;
+                *bpp = INTERNAL_BPP_64;
+                break;
+        case OUTPUT_IMAGE_FORMAT_RG16UI:
+        case OUTPUT_IMAGE_FORMAT_R16UI:
+                *type = INTERNAL_TYPE_16UI;
+                *bpp = INTERNAL_BPP_32;
+                break;
+
+        case OUTPUT_IMAGE_FORMAT_RGBA32I:
+                *type = INTERNAL_TYPE_32I;
+                *bpp = INTERNAL_BPP_128;
+                break;
+        case OUTPUT_IMAGE_FORMAT_RG32I:
+                *type = INTERNAL_TYPE_32I;
+                *bpp = INTERNAL_BPP_64;
+                break;
+        case OUTPUT_IMAGE_FORMAT_R32I:
+                *type = INTERNAL_TYPE_32I;
+                *bpp = INTERNAL_BPP_32;
+                break;
+
+        case OUTPUT_IMAGE_FORMAT_RGBA32UI:
+                *type = INTERNAL_TYPE_32UI;
+                *bpp = INTERNAL_BPP_128;
+                break;
+        case OUTPUT_IMAGE_FORMAT_RG32UI:
+                *type = INTERNAL_TYPE_32UI;
+                *bpp = INTERNAL_BPP_64;
+                break;
+        case OUTPUT_IMAGE_FORMAT_R32UI:
+                *type = INTERNAL_TYPE_32UI;
+                *bpp = INTERNAL_BPP_32;
+                break;
+
+        case OUTPUT_IMAGE_FORMAT_RGBA32F:
+                *type = INTERNAL_TYPE_32F;
+                *bpp = INTERNAL_BPP_128;
+                break;
+        case OUTPUT_IMAGE_FORMAT_RG32F:
+                *type = INTERNAL_TYPE_32F;
+                *bpp = INTERNAL_BPP_64;
+                break;
+        case OUTPUT_IMAGE_FORMAT_R32F:
+                *type = INTERNAL_TYPE_32F;
+                *bpp = INTERNAL_BPP_32;
+                break;
+
+        default:
+                /* Provide some default values, as we'll be called at RB
+                 * creation time, even if an RB with this format isn't
+                 * supported.
+                 */
+                *type = INTERNAL_TYPE_8;
+                *bpp = INTERNAL_BPP_32;
+                break;
+        }
+}
diff --git a/src/gallium/drivers/vc5/vc5_job.c b/src/gallium/drivers/vc5/vc5_job.c
new file mode 100644 (file)
index 0000000..57cf967
--- /dev/null
@@ -0,0 +1,429 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_job.c
+ *
+ * Functions for submitting VC5 render jobs to the kernel.
+ */
+
+#include <xf86drm.h>
+#include "vc5_context.h"
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "broadcom/clif/clif_dump.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static void
+remove_from_ht(struct hash_table *ht, void *key)
+{
+        struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+        _mesa_hash_table_remove(ht, entry);
+}
+
+static void
+vc5_job_free(struct vc5_context *vc5, struct vc5_job *job)
+{
+        struct set_entry *entry;
+
+        set_foreach(job->bos, entry) {
+                struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+                vc5_bo_unreference(&bo);
+        }
+
+        remove_from_ht(vc5->jobs, &job->key);
+
+        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+                if (job->cbufs[i]) {
+                        remove_from_ht(vc5->write_jobs, job->cbufs[i]->texture);
+                        pipe_surface_reference(&job->cbufs[i], NULL);
+                }
+        }
+        if (job->zsbuf) {
+                remove_from_ht(vc5->write_jobs, job->zsbuf->texture);
+                pipe_surface_reference(&job->zsbuf, NULL);
+        }
+
+        if (vc5->job == job)
+                vc5->job = NULL;
+
+        vc5_destroy_cl(&job->bcl);
+        vc5_destroy_cl(&job->rcl);
+        vc5_destroy_cl(&job->indirect);
+        vc5_bo_unreference(&job->tile_alloc);
+
+        ralloc_free(job);
+}
+
+static struct vc5_job *
+vc5_job_create(struct vc5_context *vc5)
+{
+        struct vc5_job *job = rzalloc(vc5, struct vc5_job);
+
+        job->vc5 = vc5;
+
+        vc5_init_cl(job, &job->bcl);
+        vc5_init_cl(job, &job->rcl);
+        vc5_init_cl(job, &job->indirect);
+
+        job->draw_min_x = ~0;
+        job->draw_min_y = ~0;
+        job->draw_max_x = 0;
+        job->draw_max_y = 0;
+
+        job->bos = _mesa_set_create(job,
+                                    _mesa_hash_pointer,
+                                    _mesa_key_pointer_equal);
+        return job;
+}
+
+void
+vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo)
+{
+        if (!bo)
+                return;
+
+        if (_mesa_set_search(job->bos, bo))
+                return;
+
+        vc5_bo_reference(bo);
+        _mesa_set_add(job->bos, bo);
+
+        uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
+
+        if (job->submit.bo_handle_count >= job->bo_handles_size) {
+                job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
+                bo_handles = reralloc(job, bo_handles,
+                                      uint32_t, job->bo_handles_size);
+                job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
+        }
+        bo_handles[job->submit.bo_handle_count++] = bo->handle;
+}
+
+void
+vc5_flush_jobs_writing_resource(struct vc5_context *vc5,
+                                struct pipe_resource *prsc)
+{
+        struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs,
+                                                           prsc);
+        if (entry) {
+                struct vc5_job *job = entry->data;
+                vc5_job_submit(vc5, job);
+        }
+}
+
+void
+vc5_flush_jobs_reading_resource(struct vc5_context *vc5,
+                                struct pipe_resource *prsc)
+{
+        struct vc5_resource *rsc = vc5_resource(prsc);
+
+        vc5_flush_jobs_writing_resource(vc5, prsc);
+
+        struct hash_entry *entry;
+        hash_table_foreach(vc5->jobs, entry) {
+                struct vc5_job *job = entry->data;
+
+                if (_mesa_set_search(job->bos, rsc->bo)) {
+                        vc5_job_submit(vc5, job);
+                        /* Reminder: vc5->jobs is safe to keep iterating even
+                         * after deletion of an entry.
+                         */
+                        continue;
+                }
+        }
+}
+
+static void
+vc5_job_set_tile_buffer_size(struct vc5_job *job)
+{
+        static const uint8_t tile_sizes[] = {
+                64, 64,
+                64, 32,
+                32, 32,
+                32, 16,
+                16, 16,
+        };
+        int tile_size_index = 0;
+        if (job->msaa)
+                tile_size_index += 2;
+
+        if (job->cbufs[3])
+                tile_size_index += 2;
+        else if (job->cbufs[2])
+                tile_size_index++;
+
+        int max_bpp = RENDER_TARGET_MAXIMUM_32BPP;
+        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+                if (job->cbufs[i]) {
+                        struct vc5_surface *surf = vc5_surface(job->cbufs[i]);
+                        max_bpp = MAX2(max_bpp, surf->internal_bpp);
+                }
+        }
+        job->internal_bpp = max_bpp;
+        STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0);
+        tile_size_index += max_bpp;
+
+        assert(tile_size_index < ARRAY_SIZE(tile_sizes));
+        job->tile_width = tile_sizes[tile_size_index * 2 + 0];
+        job->tile_height = tile_sizes[tile_size_index * 2 + 1];
+}
+
+/**
+ * Returns a vc5_job struture for tracking V3D rendering to a particular FBO.
+ *
+ * If we've already started rendering to this FBO, then return old same job,
+ * otherwise make a new one.  If we're beginning rendering to an FBO, make
+ * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
+ * have been flushed.
+ */
+struct vc5_job *
+vc5_get_job(struct vc5_context *vc5,
+            struct pipe_surface **cbufs, struct pipe_surface *zsbuf)
+{
+        /* Return the existing job for this FBO if we have one */
+        struct vc5_job_key local_key = {
+                .cbufs = {
+                        cbufs[0],
+                        cbufs[1],
+                        cbufs[2],
+                        cbufs[3],
+                },
+                .zsbuf = zsbuf,
+        };
+        struct hash_entry *entry = _mesa_hash_table_search(vc5->jobs,
+                                                           &local_key);
+        if (entry)
+                return entry->data;
+
+        /* Creating a new job.  Make sure that any previous jobs reading or
+         * writing these buffers are flushed.
+         */
+        struct vc5_job *job = vc5_job_create(vc5);
+
+        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+                if (cbufs[i]) {
+                        vc5_flush_jobs_reading_resource(vc5, cbufs[i]->texture);
+                        pipe_surface_reference(&job->cbufs[i], cbufs[i]);
+
+                        if (cbufs[i]->texture->nr_samples > 1)
+                                job->msaa = true;
+                }
+        }
+        if (zsbuf) {
+                vc5_flush_jobs_reading_resource(vc5, zsbuf->texture);
+                pipe_surface_reference(&job->zsbuf, zsbuf);
+                if (zsbuf->texture->nr_samples > 1)
+                        job->msaa = true;
+        }
+
+        vc5_job_set_tile_buffer_size(job);
+
+        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+                if (cbufs[i])
+                        _mesa_hash_table_insert(vc5->write_jobs,
+                                                cbufs[i]->texture, job);
+        }
+        if (zsbuf)
+                _mesa_hash_table_insert(vc5->write_jobs, zsbuf->texture, job);
+
+        memcpy(&job->key, &local_key, sizeof(local_key));
+        _mesa_hash_table_insert(vc5->jobs, &job->key, job);
+
+        return job;
+}
+
+struct vc5_job *
+vc5_get_job_for_fbo(struct vc5_context *vc5)
+{
+        if (vc5->job)
+                return vc5->job;
+
+        struct pipe_surface **cbufs = vc5->framebuffer.cbufs;
+        struct pipe_surface *zsbuf = vc5->framebuffer.zsbuf;
+        struct vc5_job *job = vc5_get_job(vc5, cbufs, zsbuf);
+
+        /* The dirty flags are tracking what's been updated while vc5->job has
+         * been bound, so set them all to ~0 when switching between jobs.  We
+         * also need to reset all state at the start of rendering.
+         */
+        vc5->dirty = ~0;
+
+        /* If we're binding to uninitialized buffers, no need to load their
+         * contents before drawing.
+         */
+        for (int i = 0; i < 4; i++) {
+                if (cbufs[i]) {
+                        struct vc5_resource *rsc = vc5_resource(cbufs[i]->texture);
+                        if (!rsc->writes)
+                                job->cleared |= PIPE_CLEAR_COLOR0 << i;
+                }
+        }
+
+        if (zsbuf) {
+                struct vc5_resource *rsc = vc5_resource(zsbuf->texture);
+                if (!rsc->writes)
+                        job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
+        }
+
+        job->draw_tiles_x = DIV_ROUND_UP(vc5->framebuffer.width,
+                                         job->tile_width);
+        job->draw_tiles_y = DIV_ROUND_UP(vc5->framebuffer.height,
+                                         job->tile_height);
+
+        vc5->job = job;
+
+        return job;
+}
+
+static bool
+vc5_clif_dump_lookup(void *data, uint32_t addr, void **vaddr)
+{
+        struct vc5_job *job = data;
+        struct set_entry *entry;
+
+        set_foreach(job->bos, entry) {
+                struct vc5_bo *bo = (void *)entry->key;
+
+                if (addr >= bo->offset &&
+                    addr < bo->offset + bo->size) {
+                        vc5_bo_map(bo);
+                        *vaddr = bo->map + addr - bo->offset;
+                        return true;
+                }
+        }
+
+        return false;
+}
+
+static void
+vc5_clif_dump(struct vc5_context *vc5, struct vc5_job *job)
+{
+        if (!(V3D_DEBUG & V3D_DEBUG_CL))
+                return;
+
+        struct clif_dump *clif = clif_dump_init(&vc5->screen->devinfo,
+                                                stderr, vc5_clif_dump_lookup,
+                                                job);
+
+        fprintf(stderr, "BCL: 0x%08x..0x%08x\n",
+                job->submit.bcl_start, job->submit.bcl_end);
+
+        clif_dump_add_cl(clif, job->submit.bcl_start);
+
+        fprintf(stderr, "RCL: 0x%08x..0x%08x\n",
+                job->submit.rcl_start, job->submit.rcl_end);
+        clif_dump_add_cl(clif, job->submit.rcl_start);
+}
+
+/**
+ * Submits the job to the kernel and then reinitializes it.
+ */
+void
+vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job)
+{
+        if (!job->needs_flush)
+                goto done;
+
+        /* The RCL setup would choke if the draw bounds cause no drawing, so
+         * just drop the drawing if that's the case.
+         */
+        if (job->draw_max_x <= job->draw_min_x ||
+            job->draw_max_y <= job->draw_min_y) {
+                goto done;
+        }
+
+        vc5_emit_rcl(job);
+
+        if (cl_offset(&job->bcl) > 0) {
+                vc5_cl_ensure_space_with_branch(&job->bcl, 2);
+
+                /* Increment the semaphore indicating that binning is done and
+                 * unblocking the render thread.  Note that this doesn't act
+                 * until the FLUSH completes.
+                 */
+                cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
+
+                /* The FLUSH caps all of our bin lists with a
+                 * VC5_PACKET_RETURN.
+                 */
+                cl_emit(&job->bcl, FLUSH, flush);
+        }
+
+        job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
+        job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
+
+        vc5_clif_dump(vc5, job);
+
+        if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {
+                int ret;
+
+#ifndef USE_VC5_SIMULATOR
+                ret = drmIoctl(vc5->fd, DRM_IOCTL_VC5_SUBMIT_CL, &job->submit);
+#else
+                ret = vc5_simulator_flush(vc5, &job->submit, job);
+#endif
+                static bool warned = false;
+                if (ret && !warned) {
+                        fprintf(stderr, "Draw call returned %s.  "
+                                        "Expect corruption.\n", strerror(errno));
+                        warned = true;
+                }
+        }
+
+        if (vc5->last_emit_seqno - vc5->screen->finished_seqno > 5) {
+                if (!vc5_wait_seqno(vc5->screen,
+                                    vc5->last_emit_seqno - 5,
+                                    PIPE_TIMEOUT_INFINITE,
+                                    "job throttling")) {
+                        fprintf(stderr, "Job throttling failed\n");
+                }
+        }
+
+done:
+        vc5_job_free(vc5, job);
+}
+
+static bool
+vc5_job_compare(const void *a, const void *b)
+{
+        return memcmp(a, b, sizeof(struct vc5_job_key)) == 0;
+}
+
+static uint32_t
+vc5_job_hash(const void *key)
+{
+        return _mesa_hash_data(key, sizeof(struct vc5_job_key));
+}
+
+void
+vc5_job_init(struct vc5_context *vc5)
+{
+        vc5->jobs = _mesa_hash_table_create(vc5,
+                                            vc5_job_hash,
+                                            vc5_job_compare);
+        vc5->write_jobs = _mesa_hash_table_create(vc5,
+                                                  _mesa_hash_pointer,
+                                                  _mesa_key_pointer_equal);
+}
+
diff --git a/src/gallium/drivers/vc5/vc5_program.c b/src/gallium/drivers/vc5/vc5_program.c
new file mode 100644 (file)
index 0000000..02625ed
--- /dev/null
@@ -0,0 +1,565 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/ralloc.h"
+#include "util/hash_table.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "nir/tgsi_to_nir.h"
+#include "compiler/v3d_compiler.h"
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static void
+vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so,
+                                   const struct pipe_stream_output_info *stream_output)
+{
+        if (!stream_output->num_outputs)
+                return;
+
+        struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4];
+        int slot_count = 0;
+
+        for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) {
+                uint32_t buffer_offset = 0;
+                uint32_t vpm_start = slot_count;
+
+                for (int i = 0; i < stream_output->num_outputs; i++) {
+                        const struct pipe_stream_output *output =
+                                &stream_output->output[i];
+
+                        if (output->output_buffer != buffer)
+                                continue;
+
+                        /* We assume that the SO outputs appear in increasing
+                         * order in the buffer.
+                         */
+                        assert(output->dst_offset >= buffer_offset);
+
+                        /* Pad any undefined slots in the output */
+                        for (int j = buffer_offset; j < output->dst_offset; j++) {
+                                slots[slot_count] =
+                                        v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0);
+                                slot_count++;
+                        }
+
+                        /* Set the coordinate shader up to output the
+                         * components of this varying.
+                         */
+                        for (int j = 0; j < output->num_components; j++) {
+                                slots[slot_count] =
+                                        v3d_slot_from_slot_and_component(VARYING_SLOT_VAR0 +
+                                                                         output->register_index,
+                                                                         output->start_component + j);
+                                slot_count++;
+                        }
+                }
+
+                uint32_t vpm_size = slot_count - vpm_start;
+                if (!vpm_size)
+                        continue;
+
+                struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
+                        .first_shaded_vertex_value_to_output = vpm_start,
+                        .number_of_consecutive_vertex_values_to_output_as_32_bit_values = vpm_size,
+                        .output_buffer_to_write_to = buffer,
+                };
+                V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+                                                               (void *)&so->tf_specs[so->num_tf_specs++],
+                                                               &unpacked);
+        }
+
+        so->num_tf_outputs = slot_count;
+        so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot,
+                                      slot_count);
+        memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count);
+}
+
+static int
+type_size(const struct glsl_type *type)
+{
+        return glsl_count_attribute_slots(type, false);
+}
+
+static void *
+vc5_shader_state_create(struct pipe_context *pctx,
+                        const struct pipe_shader_state *cso)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_uncompiled_shader *so = CALLOC_STRUCT(vc5_uncompiled_shader);
+        if (!so)
+                return NULL;
+
+        so->program_id = vc5->next_uncompiled_program_id++;
+
+        nir_shader *s;
+
+        if (cso->type == PIPE_SHADER_IR_NIR) {
+                /* The backend takes ownership of the NIR shader on state
+                 * creation.
+                 */
+                s = cso->ir.nir;
+
+                NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size,
+                           (nir_lower_io_options)0);
+        } else {
+                assert(cso->type == PIPE_SHADER_IR_TGSI);
+
+                if (V3D_DEBUG & V3D_DEBUG_TGSI) {
+                        fprintf(stderr, "prog %d TGSI:\n",
+                                so->program_id);
+                        tgsi_dump(cso->tokens, 0);
+                        fprintf(stderr, "\n");
+                }
+                s = tgsi_to_nir(cso->tokens, &v3d_nir_options);
+        }
+
+        NIR_PASS_V(s, nir_opt_global_to_local);
+        NIR_PASS_V(s, nir_lower_regs_to_ssa);
+        NIR_PASS_V(s, nir_normalize_cubemap_coords);
+
+        NIR_PASS_V(s, nir_lower_load_const_to_scalar);
+
+        v3d_optimize_nir(s);
+
+        NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local);
+
+        /* Garbage collect dead instructions */
+        nir_sweep(s);
+
+        so->base.type = PIPE_SHADER_IR_NIR;
+        so->base.ir.nir = s;
+
+        vc5_set_transform_feedback_outputs(so, &cso->stream_output);
+
+        if (V3D_DEBUG & (V3D_DEBUG_NIR |
+                         v3d_debug_flag_for_shader_stage(s->stage))) {
+                fprintf(stderr, "%s prog %d NIR:\n",
+                        gl_shader_stage_name(s->stage),
+                        so->program_id);
+                nir_print_shader(s, stderr);
+                fprintf(stderr, "\n");
+        }
+
+        return so;
+}
+
+static struct vc5_compiled_shader *
+vc5_get_compiled_shader(struct vc5_context *vc5, struct v3d_key *key)
+{
+        struct vc5_uncompiled_shader *shader_state = key->shader_state;
+        nir_shader *s = shader_state->base.ir.nir;
+
+        struct hash_table *ht;
+        uint32_t key_size;
+        if (s->stage == MESA_SHADER_FRAGMENT) {
+                ht = vc5->fs_cache;
+                key_size = sizeof(struct v3d_fs_key);
+        } else {
+                ht = vc5->vs_cache;
+                key_size = sizeof(struct v3d_vs_key);
+        }
+
+        struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+        if (entry)
+                return entry->data;
+
+        struct vc5_compiled_shader *shader =
+                rzalloc(NULL, struct vc5_compiled_shader);
+
+        int program_id = shader_state->program_id;
+        int variant_id =
+                p_atomic_inc_return(&shader_state->compiled_variant_count);
+        uint64_t *qpu_insts;
+        uint32_t shader_size;
+
+        switch (s->stage) {
+        case MESA_SHADER_VERTEX:
+                shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data);
+
+                qpu_insts = v3d_compile_vs(vc5->screen->compiler,
+                                           (struct v3d_vs_key *)key,
+                                           shader->prog_data.vs, s,
+                                           program_id, variant_id,
+                                           &shader_size);
+                break;
+        case MESA_SHADER_FRAGMENT:
+                shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data);
+
+                qpu_insts = v3d_compile_fs(vc5->screen->compiler,
+                                           (struct v3d_fs_key *)key,
+                                           shader->prog_data.fs, s,
+                                           program_id, variant_id,
+                                           &shader_size);
+                break;
+        default:
+                unreachable("bad stage");
+        }
+
+        vc5_set_shader_uniform_dirty_flags(shader);
+
+        shader->bo = vc5_bo_alloc(vc5->screen, shader_size, "shader");
+        vc5_bo_map(shader->bo);
+        memcpy(shader->bo->map, qpu_insts, shader_size);
+
+        free(qpu_insts);
+
+        struct vc5_key *dup_key;
+        dup_key = ralloc_size(shader, key_size);
+        memcpy(dup_key, key, key_size);
+        _mesa_hash_table_insert(ht, dup_key, shader);
+
+        return shader;
+}
+
+static void
+vc5_setup_shared_key(struct vc5_context *vc5, struct v3d_key *key,
+                     struct vc5_texture_stateobj *texstate)
+{
+        for (int i = 0; i < texstate->num_textures; i++) {
+                struct pipe_sampler_view *sampler = texstate->textures[i];
+                struct vc5_sampler_view *vc5_sampler = vc5_sampler_view(sampler);
+                struct pipe_sampler_state *sampler_state =
+                        texstate->samplers[i];
+
+                if (!sampler)
+                        continue;
+
+                key->tex[i].return_size =
+                        vc5_get_tex_return_size(sampler->format);
+
+                /* For 16-bit, we set up the sampler to always return 2
+                 * channels (meaning no recompiles for most statechanges),
+                 * while for 32 we actually scale the returns with channels.
+                 */
+                if (key->tex[i].return_size == 16) {
+                        key->tex[i].return_channels = 2;
+                } else {
+                        key->tex[i].return_channels =
+                                vc5_get_tex_return_channels(sampler->format);
+                }
+
+                if (vc5_get_tex_return_size(sampler->format) == 32) {
+                        memcpy(key->tex[i].swizzle,
+                               vc5_sampler->swizzle,
+                               sizeof(vc5_sampler->swizzle));
+                } else {
+                        /* For 16-bit returns, we let the sampler state handle
+                         * the swizzle.
+                         */
+                        key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
+                        key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
+                        key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
+                        key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
+                }
+
+                if (sampler->texture->nr_samples > 1) {
+                        key->tex[i].msaa_width = sampler->texture->width0;
+                        key->tex[i].msaa_height = sampler->texture->height0;
+                } else if (sampler){
+                        key->tex[i].compare_mode = sampler_state->compare_mode;
+                        key->tex[i].compare_func = sampler_state->compare_func;
+                        key->tex[i].wrap_s = sampler_state->wrap_s;
+                        key->tex[i].wrap_t = sampler_state->wrap_t;
+                }
+        }
+
+        key->ucp_enables = vc5->rasterizer->base.clip_plane_enable;
+}
+
+static void
+vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode)
+{
+        struct vc5_job *job = vc5->job;
+        struct v3d_fs_key local_key;
+        struct v3d_fs_key *key = &local_key;
+
+        if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE |
+                            VC5_DIRTY_BLEND |
+                            VC5_DIRTY_FRAMEBUFFER |
+                            VC5_DIRTY_ZSA |
+                            VC5_DIRTY_RASTERIZER |
+                            VC5_DIRTY_SAMPLE_MASK |
+                            VC5_DIRTY_FRAGTEX |
+                            VC5_DIRTY_UNCOMPILED_FS))) {
+                return;
+        }
+
+        memset(key, 0, sizeof(*key));
+        vc5_setup_shared_key(vc5, &key->base, &vc5->fragtex);
+        key->base.shader_state = vc5->prog.bind_fs;
+        key->is_points = (prim_mode == PIPE_PRIM_POINTS);
+        key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
+                         prim_mode <= PIPE_PRIM_LINE_STRIP);
+        key->clamp_color = vc5->rasterizer->base.clamp_fragment_color;
+        if (vc5->blend->logicop_enable) {
+                key->logicop_func = vc5->blend->logicop_func;
+        } else {
+                key->logicop_func = PIPE_LOGICOP_COPY;
+        }
+        if (job->msaa) {
+                key->msaa = vc5->rasterizer->base.multisample;
+                key->sample_coverage = (vc5->rasterizer->base.multisample &&
+                                        vc5->sample_mask != (1 << VC5_MAX_SAMPLES) - 1);
+                key->sample_alpha_to_coverage = vc5->blend->alpha_to_coverage;
+                key->sample_alpha_to_one = vc5->blend->alpha_to_one;
+        }
+
+        key->depth_enabled = (vc5->zsa->base.depth.enabled ||
+                              vc5->zsa->base.stencil[0].enabled);
+        if (vc5->zsa->base.alpha.enabled) {
+                key->alpha_test = true;
+                key->alpha_test_func = vc5->zsa->base.alpha.func;
+        }
+
+        if (vc5->framebuffer.cbufs[0]) {
+                struct pipe_surface *cbuf = vc5->framebuffer.cbufs[0];
+                const struct util_format_description *desc =
+                        util_format_description(cbuf->format);
+
+                key->swap_color_rb = desc->swizzle[0] == PIPE_SWIZZLE_Z;
+        }
+
+        if (key->is_points) {
+                key->point_sprite_mask =
+                        vc5->rasterizer->base.sprite_coord_enable;
+                key->point_coord_upper_left =
+                        (vc5->rasterizer->base.sprite_coord_mode ==
+                         PIPE_SPRITE_COORD_UPPER_LEFT);
+        }
+
+        key->light_twoside = vc5->rasterizer->base.light_twoside;
+
+        struct vc5_compiled_shader *old_fs = vc5->prog.fs;
+        vc5->prog.fs = vc5_get_compiled_shader(vc5, &key->base);
+        if (vc5->prog.fs == old_fs)
+                return;
+
+        vc5->dirty |= VC5_DIRTY_COMPILED_FS;
+
+        if (old_fs &&
+            (vc5->prog.fs->prog_data.fs->flat_shade_flags !=
+             old_fs->prog_data.fs->flat_shade_flags ||
+             (vc5->rasterizer->base.flatshade &&
+              vc5->prog.fs->prog_data.fs->color_inputs !=
+              old_fs->prog_data.fs->color_inputs))) {
+                vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
+        }
+
+        if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots,
+                             old_fs->prog_data.fs->input_slots,
+                             sizeof(vc5->prog.fs->prog_data.fs->input_slots))) {
+                vc5->dirty |= VC5_DIRTY_FS_INPUTS;
+        }
+}
+
+static void
+vc5_update_compiled_vs(struct vc5_context *vc5, uint8_t prim_mode)
+{
+        struct v3d_vs_key local_key;
+        struct v3d_vs_key *key = &local_key;
+
+        if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE |
+                            VC5_DIRTY_RASTERIZER |
+                            VC5_DIRTY_VERTTEX |
+                            VC5_DIRTY_VTXSTATE |
+                            VC5_DIRTY_UNCOMPILED_VS |
+                            VC5_DIRTY_FS_INPUTS))) {
+                return;
+        }
+
+        memset(key, 0, sizeof(*key));
+        vc5_setup_shared_key(vc5, &key->base, &vc5->verttex);
+        key->base.shader_state = vc5->prog.bind_vs;
+        key->num_fs_inputs = vc5->prog.fs->prog_data.fs->base.num_inputs;
+        STATIC_ASSERT(sizeof(key->fs_inputs) ==
+                      sizeof(vc5->prog.fs->prog_data.fs->input_slots));
+        memcpy(key->fs_inputs, vc5->prog.fs->prog_data.fs->input_slots,
+               sizeof(key->fs_inputs));
+        key->clamp_color = vc5->rasterizer->base.clamp_vertex_color;
+
+        key->per_vertex_point_size =
+                (prim_mode == PIPE_PRIM_POINTS &&
+                 vc5->rasterizer->base.point_size_per_vertex);
+
+        struct vc5_compiled_shader *vs =
+                vc5_get_compiled_shader(vc5, &key->base);
+        if (vs != vc5->prog.vs) {
+                vc5->prog.vs = vs;
+                vc5->dirty |= VC5_DIRTY_COMPILED_VS;
+        }
+
+        key->is_coord = true;
+        /* Coord shaders only output varyings used by transform feedback. */
+        struct vc5_uncompiled_shader *shader_state = key->base.shader_state;
+        memcpy(key->fs_inputs, shader_state->tf_outputs,
+               sizeof(*key->fs_inputs) * shader_state->num_tf_outputs);
+        if (shader_state->num_tf_outputs < key->num_fs_inputs) {
+                memset(&key->fs_inputs[shader_state->num_tf_outputs],
+                       0,
+                       sizeof(*key->fs_inputs) * (key->num_fs_inputs -
+                                                  shader_state->num_tf_outputs));
+        }
+        key->num_fs_inputs = shader_state->num_tf_outputs;
+
+        struct vc5_compiled_shader *cs =
+                vc5_get_compiled_shader(vc5, &key->base);
+        if (cs != vc5->prog.cs) {
+                vc5->prog.cs = cs;
+                vc5->dirty |= VC5_DIRTY_COMPILED_CS;
+        }
+}
+
+void
+vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode)
+{
+        vc5_update_compiled_fs(vc5, prim_mode);
+        vc5_update_compiled_vs(vc5, prim_mode);
+}
+
+static uint32_t
+fs_cache_hash(const void *key)
+{
+        return _mesa_hash_data(key, sizeof(struct v3d_fs_key));
+}
+
+static uint32_t
+vs_cache_hash(const void *key)
+{
+        return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
+}
+
+static bool
+fs_cache_compare(const void *key1, const void *key2)
+{
+        return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0;
+}
+
+static bool
+vs_cache_compare(const void *key1, const void *key2)
+{
+        return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
+}
+
+static void
+delete_from_cache_if_matches(struct hash_table *ht,
+                             struct vc5_compiled_shader **last_compile,
+                             struct hash_entry *entry,
+                             struct vc5_uncompiled_shader *so)
+{
+        const struct v3d_key *key = entry->key;
+
+        if (key->shader_state == so) {
+                struct vc5_compiled_shader *shader = entry->data;
+                _mesa_hash_table_remove(ht, entry);
+                vc5_bo_unreference(&shader->bo);
+
+                if (shader == *last_compile)
+                        *last_compile = NULL;
+
+                ralloc_free(shader);
+        }
+}
+
+static void
+vc5_shader_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_uncompiled_shader *so = hwcso;
+
+        struct hash_entry *entry;
+        hash_table_foreach(vc5->fs_cache, entry) {
+                delete_from_cache_if_matches(vc5->fs_cache, &vc5->prog.fs,
+                                             entry, so);
+        }
+        hash_table_foreach(vc5->vs_cache, entry) {
+                delete_from_cache_if_matches(vc5->vs_cache, &vc5->prog.vs,
+                                             entry, so);
+        }
+
+        ralloc_free(so->base.ir.nir);
+        free(so);
+}
+
+static void
+vc5_fp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->prog.bind_fs = hwcso;
+        vc5->dirty |= VC5_DIRTY_UNCOMPILED_FS;
+}
+
+static void
+vc5_vp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->prog.bind_vs = hwcso;
+        vc5->dirty |= VC5_DIRTY_UNCOMPILED_VS;
+}
+
+void
+vc5_program_init(struct pipe_context *pctx)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+
+        pctx->create_vs_state = vc5_shader_state_create;
+        pctx->delete_vs_state = vc5_shader_state_delete;
+
+        pctx->create_fs_state = vc5_shader_state_create;
+        pctx->delete_fs_state = vc5_shader_state_delete;
+
+        pctx->bind_fs_state = vc5_fp_state_bind;
+        pctx->bind_vs_state = vc5_vp_state_bind;
+
+        vc5->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash,
+                                                fs_cache_compare);
+        vc5->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
+                                                vs_cache_compare);
+}
+
+void
+vc5_program_fini(struct pipe_context *pctx)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+
+        struct hash_entry *entry;
+        hash_table_foreach(vc5->fs_cache, entry) {
+                struct vc5_compiled_shader *shader = entry->data;
+                vc5_bo_unreference(&shader->bo);
+                ralloc_free(shader);
+                _mesa_hash_table_remove(vc5->fs_cache, entry);
+        }
+
+        hash_table_foreach(vc5->vs_cache, entry) {
+                struct vc5_compiled_shader *shader = entry->data;
+                vc5_bo_unreference(&shader->bo);
+                ralloc_free(shader);
+                _mesa_hash_table_remove(vc5->vs_cache, entry);
+        }
+}
diff --git a/src/gallium/drivers/vc5/vc5_query.c b/src/gallium/drivers/vc5/vc5_query.c
new file mode 100644 (file)
index 0000000..c114e76
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Stub support for occlusion queries.
+ *
+ * Since we expose support for GL 2.0, we have to expose occlusion queries,
+ * but the spec allows you to expose 0 query counter bits, so we just return 0
+ * as the result of all our queries.
+ */
+#include "vc5_context.h"
+
+struct vc5_query
+{
+        uint8_t pad;
+};
+
+static struct pipe_query *
+vc5_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
+{
+        struct vc5_query *query = calloc(1, sizeof(*query));
+
+        /* Note that struct pipe_query isn't actually defined anywhere. */
+        return (struct pipe_query *)query;
+}
+
+static void
+vc5_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+        free(query);
+}
+
+static boolean
+vc5_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+        return true;
+}
+
+static bool
+vc5_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+        return true;
+}
+
+static boolean
+vc5_get_query_result(struct pipe_context *ctx, struct pipe_query *query,
+                     boolean wait, union pipe_query_result *vresult)
+{
+        uint64_t *result = &vresult->u64;
+
+        *result = 0;
+
+        return true;
+}
+
+static void
+vc5_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
+void
+vc5_query_init(struct pipe_context *pctx)
+{
+        pctx->create_query = vc5_create_query;
+        pctx->destroy_query = vc5_destroy_query;
+        pctx->begin_query = vc5_begin_query;
+        pctx->end_query = vc5_end_query;
+        pctx->get_query_result = vc5_get_query_result;
+        pctx->set_active_query_state = vc5_set_active_query_state;
+}
+
diff --git a/src/gallium/drivers/vc5/vc5_rcl.c b/src/gallium/drivers/vc5/vc5_rcl.c
new file mode 100644 (file)
index 0000000..287a35a
--- /dev/null
@@ -0,0 +1,218 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "vc5_context.h"
+#include "vc5_tiling.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+void
+vc5_emit_rcl(struct vc5_job *job)
+{
+        uint32_t min_x_tile = job->draw_min_x / job->tile_width;
+        uint32_t min_y_tile = job->draw_min_y / job->tile_height;
+        uint32_t max_x_tile = (job->draw_max_x - 1) / job->tile_width;
+        uint32_t max_y_tile = (job->draw_max_y - 1) / job->tile_height;
+
+        /* The RCL list should be empty. */
+        assert(!job->rcl.bo);
+
+        vc5_cl_ensure_space(&job->rcl,
+                            256 +
+                            (64 *
+                             (max_x_tile - min_x_tile + 1) *
+                             (max_y_tile - min_y_tile + 1)), 1);
+
+        job->submit.rcl_start = job->rcl.bo->offset;
+        vc5_job_add_bo(job, job->rcl.bo);
+
+        int nr_cbufs = 0;
+        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+                if (job->cbufs[i])
+                        nr_cbufs = i + 1;
+        }
+
+        /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION
+         * and Z_STENCIL_CLEAR_VALUES must be last.  The ones in between are
+         * optional updates to the previous HW state.
+         */
+        cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION,
+                config) {
+                config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH;
+                config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL;
+
+                config.early_z_disable = !job->uses_early_z;
+
+                config.image_width_pixels = job->draw_width;
+                config.image_height_pixels = job->draw_height;
+
+                config.number_of_render_targets_minus_1 =
+                        MAX2(nr_cbufs, 1) - 1;
+
+                config.maximum_bpp_of_all_render_targets = job->internal_bpp;
+        }
+
+        for (int i = 0; i < nr_cbufs; i++) {
+                cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) {
+                        struct pipe_surface *psurf = job->cbufs[i];
+                        if (!psurf)
+                                continue;
+
+                        struct vc5_surface *surf = vc5_surface(psurf);
+                        struct vc5_resource *rsc = vc5_resource(psurf->texture);
+                        rt.address = cl_address(rsc->bo, surf->offset);
+                        rt.internal_type = surf->internal_type;
+                        rt.output_image_format = surf->format;
+                        rt.memory_format = surf->tiling;
+                        rt.internal_bpp = surf->internal_bpp;
+                        rt.render_target_number = i;
+
+                        if (job->resolve & PIPE_CLEAR_COLOR0 << i)
+                                rsc->writes++;
+                }
+        }
+
+        /* TODO: Don't bother emitting if we don't load/clear Z/S. */
+        if (job->zsbuf) {
+                struct pipe_surface *psurf = job->zsbuf;
+                struct vc5_surface *surf = vc5_surface(psurf);
+                struct vc5_resource *rsc = vc5_resource(psurf->texture);
+
+                cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) {
+                        zs.address = cl_address(rsc->bo, surf->offset);
+
+                        zs.internal_type = surf->internal_type;
+                        zs.output_image_format = surf->format;
+
+                        struct vc5_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
+                        /* XXX */
+                        zs.padded_height_of_output_image_in_uif_blocks =
+                                (slice->size / slice->stride) / (2 * vc5_utile_height(rsc->cpp));
+
+                        assert(surf->tiling != VC5_TILING_RASTER);
+                        zs.memory_format = surf->tiling;
+                }
+
+                if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL)
+                        rsc->writes++;
+        }
+
+        cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1,
+                clear) {
+                clear.clear_color_low_32_bits = job->clear_color[0];
+        };
+
+        /* Ends rendering mode config. */
+        cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES,
+                clear) {
+                clear.z_s_clear_value = job->clear_zs;
+        };
+
+        /* Always set initial block size before the first branch, which needs
+         * to match the value from binning mode config.
+         */
+        cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
+                init.use_auto_chained_tile_lists = true;
+                init.size_of_first_block_in_chained_tile_lists =
+                        TILE_ALLOCATION_BLOCK_SIZE_64B;
+        }
+
+        cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);
+
+        /* Start by clearing the tile buffer. */
+        cl_emit(&job->rcl, TILE_COORDINATES, coords) {
+                coords.tile_column_number = 0;
+                coords.tile_row_number = 0;
+        }
+
+        cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
+                store.buffer_to_store = NONE;
+        }
+
+        cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
+
+        const uint32_t pipe_clear_color_buffers = (PIPE_CLEAR_COLOR0 |
+                                                   PIPE_CLEAR_COLOR1 |
+                                                   PIPE_CLEAR_COLOR2 |
+                                                   PIPE_CLEAR_COLOR3);
+        const uint32_t first_color_buffer_bit = (ffs(PIPE_CLEAR_COLOR0) - 1);
+
+        for (int y = min_y_tile; y <= max_y_tile; y++) {
+                for (int x = min_x_tile; x <= max_x_tile; x++) {
+                        uint32_t read_but_not_cleared = job->resolve & ~job->cleared;
+
+                        /* The initial reload will be queued until we get the
+                         * tile coordinates.
+                         */
+                        if (read_but_not_cleared) {
+                                cl_emit(&job->rcl, RELOAD_TILE_COLOUR_BUFFER, load) {
+                                        load.disable_colour_buffer_load =
+                                                (~read_but_not_cleared & pipe_clear_color_buffers) >>
+                                                first_color_buffer_bit;
+                                        load.enable_z_load =
+                                                read_but_not_cleared & PIPE_CLEAR_DEPTH;
+                                        load.enable_stencil_load =
+                                                read_but_not_cleared & PIPE_CLEAR_STENCIL;
+                                }
+                        }
+
+                        /* Tile Coordinates triggers the reload and sets where
+                         * the stores go. There must be one per store packet.
+                         */
+                        cl_emit(&job->rcl, TILE_COORDINATES, coords) {
+                                coords.tile_column_number = x;
+                                coords.tile_row_number = y;
+                        }
+
+                        cl_emit(&job->rcl, BRANCH_TO_AUTO_CHAINED_SUB_LIST, branch) {
+                                uint32_t bin_tile_stride =
+                                        (align(job->draw_width,
+                                               job->tile_width) /
+                                         job->tile_width);
+                                uint32_t bin_index =
+                                        (y * bin_tile_stride + x);
+                                branch.address = cl_address(job->tile_alloc,
+                                                            64 * bin_index);
+                        }
+
+                        cl_emit(&job->rcl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
+                                uint32_t color_write_enables =
+                                        job->resolve >> first_color_buffer_bit;
+
+                                store.disable_color_buffer_write = (~color_write_enables) & 0xf;
+                                store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH;
+                                store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL;
+
+                                store.disable_colour_buffers_clear_on_write =
+                                        (job->cleared & pipe_clear_color_buffers) == 0;
+                                store.disable_z_buffer_clear_on_write =
+                                        !(job->cleared & PIPE_CLEAR_DEPTH);
+                                store.disable_stencil_buffer_clear_on_write =
+                                        !(job->cleared & PIPE_CLEAR_STENCIL);
+
+                                store.last_tile_of_frame = (x == max_x_tile &&
+                                                            y == max_y_tile);
+                        };
+                }
+        }
+}
diff --git a/src/gallium/drivers/vc5/vc5_resource.c b/src/gallium/drivers/vc5/vc5_resource.c
new file mode 100644 (file)
index 0000000..8dbdb71
--- /dev/null
@@ -0,0 +1,758 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_blit.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+#include "util/u_upload_mgr.h"
+
+#include "drm_fourcc.h"
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_resource.h"
+#include "vc5_tiling.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+#ifndef DRM_FORMAT_MOD_INVALID
+#define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1)
+#endif
+
+static bool
+vc5_resource_bo_alloc(struct vc5_resource *rsc)
+{
+        struct pipe_resource *prsc = &rsc->base.b;
+        struct pipe_screen *pscreen = prsc->screen;
+        struct vc5_bo *bo;
+        int layers = (prsc->target == PIPE_TEXTURE_3D ?
+                      prsc->depth0 : prsc->array_size);
+
+        bo = vc5_bo_alloc(vc5_screen(pscreen),
+                          rsc->slices[0].offset +
+                          rsc->slices[0].size +
+                          rsc->cube_map_stride * layers - 1,
+                          "resource");
+        if (bo) {
+                DBG(V3D_DEBUG_SURFACE, "alloc %p @ 0x%08x:\n", rsc, bo->offset);
+                vc5_bo_unreference(&rsc->bo);
+                rsc->bo = bo;
+                return true;
+        } else {
+                return false;
+        }
+}
+
+static void
+vc5_resource_transfer_unmap(struct pipe_context *pctx,
+                            struct pipe_transfer *ptrans)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_transfer *trans = vc5_transfer(ptrans);
+
+        if (trans->map) {
+                struct vc5_resource *rsc;
+                struct vc5_resource_slice *slice;
+                if (trans->ss_resource) {
+                        rsc = vc5_resource(trans->ss_resource);
+                        slice = &rsc->slices[0];
+                } else {
+                        rsc = vc5_resource(ptrans->resource);
+                        slice = &rsc->slices[ptrans->level];
+                }
+
+                if (ptrans->usage & PIPE_TRANSFER_WRITE) {
+                        vc5_store_tiled_image(rsc->bo->map + slice->offset +
+                                              ptrans->box.z * rsc->cube_map_stride,
+                                              slice->stride,
+                                              trans->map, ptrans->stride,
+                                              slice->tiling, rsc->cpp,
+                                              rsc->base.b.height0,
+                                              &ptrans->box);
+                }
+                free(trans->map);
+        }
+
+        if (trans->ss_resource && (ptrans->usage & PIPE_TRANSFER_WRITE)) {
+                struct pipe_blit_info blit;
+                memset(&blit, 0, sizeof(blit));
+
+                blit.src.resource = trans->ss_resource;
+                blit.src.format = trans->ss_resource->format;
+                blit.src.box.width = trans->ss_box.width;
+                blit.src.box.height = trans->ss_box.height;
+                blit.src.box.depth = 1;
+
+                blit.dst.resource = ptrans->resource;
+                blit.dst.format = ptrans->resource->format;
+                blit.dst.level = ptrans->level;
+                blit.dst.box = trans->ss_box;
+
+                blit.mask = util_format_get_mask(ptrans->resource->format);
+                blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+                pctx->blit(pctx, &blit);
+
+                pipe_resource_reference(&trans->ss_resource, NULL);
+        }
+
+        pipe_resource_reference(&ptrans->resource, NULL);
+        slab_free(&vc5->transfer_pool, ptrans);
+}
+
+static struct pipe_resource *
+vc5_get_temp_resource(struct pipe_context *pctx,
+                      struct pipe_resource *prsc,
+                      const struct pipe_box *box)
+{
+        struct pipe_resource temp_setup;
+
+        memset(&temp_setup, 0, sizeof(temp_setup));
+        temp_setup.target = prsc->target;
+        temp_setup.format = prsc->format;
+        temp_setup.width0 = box->width;
+        temp_setup.height0 = box->height;
+        temp_setup.depth0 = 1;
+        temp_setup.array_size = 1;
+
+        return pctx->screen->resource_create(pctx->screen, &temp_setup);
+}
+
+static void *
+vc5_resource_transfer_map(struct pipe_context *pctx,
+                          struct pipe_resource *prsc,
+                          unsigned level, unsigned usage,
+                          const struct pipe_box *box,
+                          struct pipe_transfer **pptrans)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_resource *rsc = vc5_resource(prsc);
+        struct vc5_transfer *trans;
+        struct pipe_transfer *ptrans;
+        enum pipe_format format = prsc->format;
+        char *buf;
+
+        /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
+         * being mapped.
+         */
+        if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+            !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+            !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) &&
+            prsc->last_level == 0 &&
+            prsc->width0 == box->width &&
+            prsc->height0 == box->height &&
+            prsc->depth0 == box->depth &&
+            prsc->array_size == 1 &&
+            rsc->bo->private) {
+                usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+        }
+
+        if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+                if (vc5_resource_bo_alloc(rsc)) {
+                        /* If it might be bound as one of our vertex buffers
+                         * or UBOs, make sure we re-emit vertex buffer state
+                         * or uniforms.
+                         */
+                        if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+                                vc5->dirty |= VC5_DIRTY_VTXBUF;
+                        if (prsc->bind & PIPE_BIND_CONSTANT_BUFFER)
+                                vc5->dirty |= VC5_DIRTY_CONSTBUF;
+                } else {
+                        /* If we failed to reallocate, flush users so that we
+                         * don't violate any syncing requirements.
+                         */
+                        vc5_flush_jobs_reading_resource(vc5, prsc);
+                }
+        } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+                /* If we're writing and the buffer is being used by the CL, we
+                 * have to flush the CL first.  If we're only reading, we need
+                 * to flush if the CL has written our buffer.
+                 */
+                if (usage & PIPE_TRANSFER_WRITE)
+                        vc5_flush_jobs_reading_resource(vc5, prsc);
+                else
+                        vc5_flush_jobs_writing_resource(vc5, prsc);
+        }
+
+        if (usage & PIPE_TRANSFER_WRITE) {
+                rsc->writes++;
+                rsc->initialized_buffers = ~0;
+        }
+
+        trans = slab_alloc(&vc5->transfer_pool);
+        if (!trans)
+                return NULL;
+
+        /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */
+
+        /* slab_alloc_st() doesn't zero: */
+        memset(trans, 0, sizeof(*trans));
+        ptrans = &trans->base;
+
+        pipe_resource_reference(&ptrans->resource, prsc);
+        ptrans->level = level;
+        ptrans->usage = usage;
+        ptrans->box = *box;
+
+        /* If the resource is multisampled, we need to resolve to single
+         * sample.  This seems like it should be handled at a higher layer.
+         */
+        if (prsc->nr_samples > 1) {
+                trans->ss_resource = vc5_get_temp_resource(pctx, prsc, box);
+                if (!trans->ss_resource)
+                        goto fail;
+                assert(!trans->ss_resource->nr_samples);
+
+                /* The ptrans->box gets modified for tile alignment, so save
+                 * the original box for unmap time.
+                 */
+                trans->ss_box = *box;
+
+                if (usage & PIPE_TRANSFER_READ) {
+                        struct pipe_blit_info blit;
+                        memset(&blit, 0, sizeof(blit));
+
+                        blit.src.resource = ptrans->resource;
+                        blit.src.format = ptrans->resource->format;
+                        blit.src.level = ptrans->level;
+                        blit.src.box = trans->ss_box;
+
+                        blit.dst.resource = trans->ss_resource;
+                        blit.dst.format = trans->ss_resource->format;
+                        blit.dst.box.width = trans->ss_box.width;
+                        blit.dst.box.height = trans->ss_box.height;
+                        blit.dst.box.depth = 1;
+
+                        blit.mask = util_format_get_mask(prsc->format);
+                        blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+                        pctx->blit(pctx, &blit);
+                        vc5_flush_jobs_writing_resource(vc5, blit.dst.resource);
+                }
+
+                /* The rest of the mapping process should use our temporary. */
+                prsc = trans->ss_resource;
+                rsc = vc5_resource(prsc);
+                ptrans->box.x = 0;
+                ptrans->box.y = 0;
+                ptrans->box.z = 0;
+        }
+
+        /* Note that the current kernel implementation is synchronous, so no
+         * need to do syncing stuff here yet.
+         */
+
+        if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+                buf = vc5_bo_map_unsynchronized(rsc->bo);
+        else
+                buf = vc5_bo_map(rsc->bo);
+        if (!buf) {
+                fprintf(stderr, "Failed to map bo\n");
+                goto fail;
+        }
+
+        *pptrans = ptrans;
+
+        struct vc5_resource_slice *slice = &rsc->slices[level];
+        if (rsc->tiled) {
+                /* No direct mappings of tiled, since we need to manually
+                 * tile/untile.
+                 */
+                if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+                        return NULL;
+
+                ptrans->stride = ptrans->box.width * rsc->cpp;
+                ptrans->layer_stride = ptrans->stride * ptrans->box.height;
+
+                trans->map = malloc(ptrans->layer_stride * ptrans->box.depth);
+
+                if (usage & PIPE_TRANSFER_READ) {
+                        vc5_load_tiled_image(trans->map, ptrans->stride,
+                                             buf + slice->offset +
+                                             ptrans->box.z * rsc->cube_map_stride,
+                                             slice->stride,
+                                             slice->tiling, rsc->cpp,
+                                             rsc->base.b.height0,
+                                             &ptrans->box);
+                }
+                return trans->map;
+        } else {
+                ptrans->stride = slice->stride;
+                ptrans->layer_stride = ptrans->stride;
+
+                return buf + slice->offset +
+                        ptrans->box.y / util_format_get_blockheight(format) * ptrans->stride +
+                        ptrans->box.x / util_format_get_blockwidth(format) * rsc->cpp +
+                        ptrans->box.z * rsc->cube_map_stride;
+        }
+
+
+fail:
+        vc5_resource_transfer_unmap(pctx, ptrans);
+        return NULL;
+}
+
+static void
+vc5_resource_destroy(struct pipe_screen *pscreen,
+                     struct pipe_resource *prsc)
+{
+        struct vc5_resource *rsc = vc5_resource(prsc);
+        vc5_bo_unreference(&rsc->bo);
+        free(rsc);
+}
+
+static boolean
+vc5_resource_get_handle(struct pipe_screen *pscreen,
+                        struct pipe_resource *prsc,
+                        struct winsys_handle *whandle)
+{
+        struct vc5_resource *rsc = vc5_resource(prsc);
+        struct vc5_bo *bo = rsc->bo;
+
+        whandle->stride = rsc->slices[0].stride;
+
+        /* If we're passing some reference to our BO out to some other part of
+         * the system, then we can't do any optimizations about only us being
+         * the ones seeing it (like BO caching).
+         */
+        bo->private = false;
+
+        switch (whandle->type) {
+        case DRM_API_HANDLE_TYPE_SHARED:
+                return vc5_bo_flink(bo, &whandle->handle);
+        case DRM_API_HANDLE_TYPE_KMS:
+                whandle->handle = bo->handle;
+                return TRUE;
+        case DRM_API_HANDLE_TYPE_FD:
+                whandle->handle = vc5_bo_get_dmabuf(bo);
+                return whandle->handle != -1;
+        }
+
+        return FALSE;
+}
+
+static const struct u_resource_vtbl vc5_resource_vtbl = {
+        .resource_get_handle      = vc5_resource_get_handle,
+        .resource_destroy         = vc5_resource_destroy,
+        .transfer_map             = vc5_resource_transfer_map,
+        .transfer_flush_region    = u_default_transfer_flush_region,
+        .transfer_unmap           = vc5_resource_transfer_unmap,
+};
+
+static void
+vc5_setup_slices(struct vc5_resource *rsc, const char *caller)
+{
+        struct pipe_resource *prsc = &rsc->base.b;
+        uint32_t width = prsc->width0;
+        uint32_t height = prsc->height0;
+        uint32_t pot_width = util_next_power_of_two(width);
+        uint32_t pot_height = util_next_power_of_two(height);
+        uint32_t offset = 0;
+        uint32_t utile_w = vc5_utile_width(rsc->cpp);
+        uint32_t utile_h = vc5_utile_height(rsc->cpp);
+        uint32_t uif_block_w = utile_w * 2;
+        uint32_t uif_block_h = utile_h * 2;
+        bool uif_top = false;
+
+        for (int i = prsc->last_level; i >= 0; i--) {
+                struct vc5_resource_slice *slice = &rsc->slices[i];
+
+                uint32_t level_width, level_height;
+                if (i < 2) {
+                        level_width = u_minify(width, i);
+                        level_height = u_minify(height, i);
+                } else {
+                        level_width = u_minify(pot_width, i);
+                        level_height = u_minify(pot_height, i);
+                }
+
+                if (!rsc->tiled) {
+                        slice->tiling = VC5_TILING_RASTER;
+                        if (prsc->nr_samples > 1) {
+                                /* MSAA (4x) surfaces are stored as raw tile buffer contents. */
+                                level_width = align(level_width, 32);
+                                level_height = align(level_height, 32);
+                        }
+                } else {
+                        if ((i != 0 || !uif_top) &&
+                            (level_width <= utile_w ||
+                             level_height <= utile_h)) {
+                                slice->tiling = VC5_TILING_LINEARTILE;
+                                level_width = align(level_width, utile_w);
+                                level_height = align(level_height, utile_h);
+                        } else if ((i != 0 || !uif_top) &&
+                                   level_width <= uif_block_w) {
+                                slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN;
+                                level_width = align(level_width, uif_block_w);
+                                level_height = align(level_height, uif_block_h);
+                        } else if ((i != 0 || !uif_top) &&
+                                   level_width <= 2 * uif_block_w) {
+                                slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN;
+                                level_width = align(level_width, 2 * uif_block_w);
+                                level_height = align(level_height, uif_block_h);
+                        } else {
+                                slice->tiling = VC5_TILING_UIF_NO_XOR;
+
+                                level_width = align(level_width,
+                                                    4 * uif_block_w);
+                                level_height = align(level_height,
+                                                     4 * uif_block_h);
+                        }
+                }
+
+                slice->offset = offset;
+                slice->stride = (level_width * rsc->cpp *
+                                 MAX2(prsc->nr_samples, 1));
+                slice->size = level_height * slice->stride;
+
+                offset += slice->size;
+
+                if (V3D_DEBUG & V3D_DEBUG_SURFACE) {
+                        static const char *const tiling_descriptions[] = {
+                                [VC5_TILING_RASTER] = "R",
+                                [VC5_TILING_LINEARTILE] = "LT",
+                                [VC5_TILING_UBLINEAR_1_COLUMN] = "UB1",
+                                [VC5_TILING_UBLINEAR_2_COLUMN] = "UB2",
+                                [VC5_TILING_UIF_NO_XOR] = "UIF",
+                                [VC5_TILING_UIF_XOR] = "UIF^",
+                        };
+
+                        fprintf(stderr,
+                                "rsc %s %p (format %s), %dx%d: "
+                                "level %d (%s) %dx%d -> %dx%d, stride %d@0x%08x\n",
+                                caller, rsc,
+                                util_format_short_name(prsc->format),
+                                prsc->width0, prsc->height0,
+                                i, tiling_descriptions[slice->tiling],
+                                u_minify(prsc->width0, i),
+                                u_minify(prsc->height0, i),
+                                level_width, level_height,
+                                slice->stride, slice->offset);
+                }
+        }
+
+        /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only
+         * needs to be aligned to utile boundaries.  Since tiles are laid out
+         * from small to big in memory, we need to align the later UIF slices
+         * to UIF blocks, if they were preceded by non-UIF-block-aligned LT
+         * slices.
+         *
+         * We additionally align to 4k, which improves UIF XOR performance.
+         */
+        uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) -
+                                      rsc->slices[0].offset);
+        if (page_align_offset) {
+                for (int i = 0; i <= prsc->last_level; i++)
+                        rsc->slices[i].offset += page_align_offset;
+        }
+
+        /* Arrays, cubes, and 3D textures have a stride which is the distance
+         * from one full mipmap tree to the next (64b aligned).
+         */
+        rsc->cube_map_stride = align(rsc->slices[0].offset +
+                                     rsc->slices[0].size, 64);
+}
+
+static struct vc5_resource *
+vc5_resource_setup(struct pipe_screen *pscreen,
+                   const struct pipe_resource *tmpl)
+{
+        struct vc5_resource *rsc = CALLOC_STRUCT(vc5_resource);
+        if (!rsc)
+                return NULL;
+        struct pipe_resource *prsc = &rsc->base.b;
+
+        *prsc = *tmpl;
+
+        pipe_reference_init(&prsc->reference, 1);
+        prsc->screen = pscreen;
+
+        rsc->base.vtbl = &vc5_resource_vtbl;
+        if (prsc->nr_samples <= 1)
+                rsc->cpp = util_format_get_blocksize(tmpl->format);
+        else
+                rsc->cpp = sizeof(uint32_t);
+
+        assert(rsc->cpp);
+
+        return rsc;
+}
+
+static bool
+find_modifier(uint64_t needle, const uint64_t *haystack, int count)
+{
+        int i;
+
+        for (i = 0; i < count; i++) {
+                if (haystack[i] == needle)
+                        return true;
+        }
+
+        return false;
+}
+
+static struct pipe_resource *
+vc5_resource_create_with_modifiers(struct pipe_screen *pscreen,
+                                   const struct pipe_resource *tmpl,
+                                   const uint64_t *modifiers,
+                                   int count)
+{
+        bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
+        struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl);
+        struct pipe_resource *prsc = &rsc->base.b;
+        /* Use a tiled layout if we can, for better 3D performance. */
+        bool should_tile = true;
+
+        /* VBOs/PBOs are untiled (and 1 height). */
+        if (tmpl->target == PIPE_BUFFER)
+                should_tile = false;
+
+        /* Cursors are always linear, and the user can request linear as well.
+         */
+        if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR))
+                should_tile = false;
+
+        /* Scanout BOs for simulator need to be linear for interaction with
+         * i965.
+         */
+        if (using_vc5_simulator &&
+            tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
+                should_tile = false;
+
+        /* No user-specified modifier; determine our own. */
+        if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) {
+                linear_ok = true;
+                rsc->tiled = should_tile;
+        } else if (should_tile &&
+                   find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
+                                 modifiers, count)) {
+                rsc->tiled = true;
+        } else if (linear_ok) {
+                rsc->tiled = false;
+        } else {
+                fprintf(stderr, "Unsupported modifier requested\n");
+                return NULL;
+        }
+
+        if (tmpl->target != PIPE_BUFFER)
+                rsc->tex_format = vc5_get_tex_format(prsc->format);
+
+        vc5_setup_slices(rsc, "create");
+        if (!vc5_resource_bo_alloc(rsc))
+                goto fail;
+
+        return prsc;
+fail:
+        vc5_resource_destroy(pscreen, prsc);
+        return NULL;
+}
+
+struct pipe_resource *
+vc5_resource_create(struct pipe_screen *pscreen,
+                    const struct pipe_resource *tmpl)
+{
+        const uint64_t mod = DRM_FORMAT_MOD_INVALID;
+        return vc5_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
+}
+
+static struct pipe_resource *
+vc5_resource_from_handle(struct pipe_screen *pscreen,
+                         const struct pipe_resource *tmpl,
+                         struct winsys_handle *whandle,
+                         unsigned usage)
+{
+        struct vc5_screen *screen = vc5_screen(pscreen);
+        struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl);
+        struct pipe_resource *prsc = &rsc->base.b;
+        struct vc5_resource_slice *slice = &rsc->slices[0];
+
+        if (!rsc)
+                return NULL;
+
+        switch (whandle->modifier) {
+        case DRM_FORMAT_MOD_LINEAR:
+                rsc->tiled = false;
+                break;
+        /* XXX: UIF */
+        default:
+                fprintf(stderr,
+                        "Attempt to import unsupported modifier 0x%llx\n",
+                        (long long)whandle->modifier);
+                goto fail;
+        }
+
+        if (whandle->offset != 0) {
+                fprintf(stderr,
+                        "Attempt to import unsupported winsys offset %u\n",
+                        whandle->offset);
+                goto fail;
+        }
+
+        switch (whandle->type) {
+        case DRM_API_HANDLE_TYPE_SHARED:
+                rsc->bo = vc5_bo_open_name(screen,
+                                           whandle->handle, whandle->stride);
+                break;
+        case DRM_API_HANDLE_TYPE_FD:
+                rsc->bo = vc5_bo_open_dmabuf(screen,
+                                             whandle->handle, whandle->stride);
+                break;
+        default:
+                fprintf(stderr,
+                        "Attempt to import unsupported handle type %d\n",
+                        whandle->type);
+                goto fail;
+        }
+
+        if (!rsc->bo)
+                goto fail;
+
+        vc5_setup_slices(rsc, "import");
+
+        rsc->tex_format = vc5_get_tex_format(prsc->format);
+
+        DBG(V3D_DEBUG_SURFACE,
+            "rsc import %p (format %s), %dx%d: "
+            "level 0 (R) -> stride %d@0x%08x\n",
+            rsc, util_format_short_name(prsc->format),
+            prsc->width0, prsc->height0,
+            slice->stride, slice->offset);
+
+        if (whandle->stride != slice->stride) {
+                static bool warned = false;
+                if (!warned) {
+                        warned = true;
+                        fprintf(stderr,
+                                "Attempting to import %dx%d %s with "
+                                "unsupported stride %d instead of %d\n",
+                                prsc->width0, prsc->height0,
+                                util_format_short_name(prsc->format),
+                                whandle->stride,
+                                slice->stride);
+                }
+                goto fail;
+        }
+
+        return prsc;
+
+fail:
+        vc5_resource_destroy(pscreen, prsc);
+        return NULL;
+}
+
+static struct pipe_surface *
+vc5_create_surface(struct pipe_context *pctx,
+                   struct pipe_resource *ptex,
+                   const struct pipe_surface *surf_tmpl)
+{
+        struct vc5_surface *surface = CALLOC_STRUCT(vc5_surface);
+        struct vc5_resource *rsc = vc5_resource(ptex);
+
+        if (!surface)
+                return NULL;
+
+        assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+
+        struct pipe_surface *psurf = &surface->base;
+        unsigned level = surf_tmpl->u.tex.level;
+
+        pipe_reference_init(&psurf->reference, 1);
+        pipe_resource_reference(&psurf->texture, ptex);
+
+        psurf->context = pctx;
+        psurf->format = surf_tmpl->format;
+        psurf->width = u_minify(ptex->width0, level);
+        psurf->height = u_minify(ptex->height0, level);
+        psurf->u.tex.level = level;
+        psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+        psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+
+        surface->offset = (rsc->slices[level].offset +
+                           psurf->u.tex.first_layer * rsc->cube_map_stride);
+        surface->tiling = rsc->slices[level].tiling;
+        surface->format = vc5_get_rt_format(psurf->format);
+
+        if (util_format_is_depth_or_stencil(psurf->format)) {
+                switch (psurf->format) {
+                case PIPE_FORMAT_Z16_UNORM:
+                        surface->internal_type = INTERNAL_TYPE_DEPTH_16;
+                        break;
+                case PIPE_FORMAT_Z32_FLOAT:
+                case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+                        surface->internal_type = INTERNAL_TYPE_DEPTH_32F;
+                        break;
+                default:
+                        surface->internal_type = INTERNAL_TYPE_DEPTH_24;
+                }
+        } else {
+                uint32_t bpp, type;
+                vc5_get_internal_type_bpp_for_output_format(surface->format,
+                                                            &type, &bpp);
+                surface->internal_type = type;
+                surface->internal_bpp = bpp;
+        }
+
+        return &surface->base;
+}
+
+static void
+vc5_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
+{
+        pipe_resource_reference(&psurf->texture, NULL);
+        FREE(psurf);
+}
+
+static void
+vc5_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource)
+{
+        /* All calls to flush_resource are followed by a flush of the context,
+         * so there's nothing to do.
+         */
+}
+
+void
+vc5_resource_screen_init(struct pipe_screen *pscreen)
+{
+        pscreen->resource_create_with_modifiers =
+                vc5_resource_create_with_modifiers;
+        pscreen->resource_create = vc5_resource_create;
+        pscreen->resource_from_handle = vc5_resource_from_handle;
+        pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+        pscreen->resource_destroy = u_resource_destroy_vtbl;
+}
+
+void
+vc5_resource_context_init(struct pipe_context *pctx)
+{
+        pctx->transfer_map = u_transfer_map_vtbl;
+        pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
+        pctx->transfer_unmap = u_transfer_unmap_vtbl;
+        pctx->buffer_subdata = u_default_buffer_subdata;
+        pctx->texture_subdata = u_default_texture_subdata;
+        pctx->create_surface = vc5_create_surface;
+        pctx->surface_destroy = vc5_surface_destroy;
+        pctx->resource_copy_region = util_resource_copy_region;
+        pctx->blit = vc5_blit;
+        pctx->flush_resource = vc5_flush_resource;
+}
diff --git a/src/gallium/drivers/vc5/vc5_resource.h b/src/gallium/drivers/vc5/vc5_resource.h
new file mode 100644 (file)
index 0000000..3440fdc
--- /dev/null
@@ -0,0 +1,158 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_RESOURCE_H
+#define VC5_RESOURCE_H
+
+#include "vc5_screen.h"
+#include "util/u_transfer.h"
+
+/* A UIFblock is a 256-byte region of memory that's 256-byte aligned.  These
+ * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB
+ * page.  Those pages are then arranged left-to-right, top-to-bottom, to cover
+ * an image.
+ *
+ * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte
+ * utiles.  Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp).
+ */
+
+/**
+ * Tiling mode enum used for vc5_resource.c, which maps directly to the Memory
+ * Format field of render target and Z/Stencil config.
+ */
+enum vc5_tiling_mode {
+        /* Untiled resources.  Not valid as texture inputs. */
+        VC5_TILING_RASTER,
+
+        /* Single line of u-tiles. */
+        VC5_TILING_LINEARTILE,
+
+        /* Departure from standard 4-UIF block column format. */
+        VC5_TILING_UBLINEAR_1_COLUMN,
+
+        /* Departure from standard 4-UIF block column format. */
+        VC5_TILING_UBLINEAR_2_COLUMN,
+
+        /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
+         * split 2x2 into utiles.
+         */
+        VC5_TILING_UIF_NO_XOR,
+
+        /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
+         * split 2x2 into utiles.
+         */
+        VC5_TILING_UIF_XOR,
+};
+
+struct vc5_transfer {
+        struct pipe_transfer base;
+        void *map;
+
+        struct pipe_resource *ss_resource;
+        struct pipe_box ss_box;
+};
+
+struct vc5_resource_slice {
+        uint32_t offset;
+        uint32_t stride;
+        uint32_t size;
+        enum vc5_tiling_mode tiling;
+};
+
+struct vc5_surface {
+        struct pipe_surface base;
+        uint32_t offset;
+        enum vc5_tiling_mode tiling;
+        /**
+         * Output image format for TILE_RENDERING_MODE_CONFIGURATION
+         */
+        uint8_t format;
+
+        /**
+         * Internal format of the tile buffer for
+         * TILE_RENDERING_MODE_CONFIGURATION.
+         */
+        uint8_t internal_type;
+
+        /**
+         * internal bpp value (0=32bpp, 2=128bpp) for color buffers in
+         * TILE_RENDERING_MODE_CONFIGURATION.
+         */
+        uint8_t internal_bpp;
+};
+
+struct vc5_resource {
+        struct u_resource base;
+        struct vc5_bo *bo;
+        struct vc5_resource_slice slices[VC5_MAX_MIP_LEVELS];
+        uint32_t cube_map_stride;
+        int cpp;
+        bool tiled;
+        /** One of V3D_TEXTURE_DATA_FORMAT_* */
+        uint8_t tex_format;
+
+        /**
+         * Number of times the resource has been written to.
+         *
+         * This is used to track whether we need to load the surface on first
+         * rendering.
+         */
+        uint64_t writes;
+
+        /**
+         * Bitmask of PIPE_CLEAR_COLOR0, PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL
+         * for which parts of the resource are defined.
+         *
+         * Used for avoiding fallback to quad clears for clearing just depth,
+         * when the stencil contents have never been initialized.  Note that
+         * we're lazy and fields not present in the buffer (DEPTH in a color
+         * buffer) may get marked.
+         */
+        uint32_t initialized_buffers;
+};
+
+static inline struct vc5_resource *
+vc5_resource(struct pipe_resource *prsc)
+{
+        return (struct vc5_resource *)prsc;
+}
+
+static inline struct vc5_surface *
+vc5_surface(struct pipe_surface *psurf)
+{
+        return (struct vc5_surface *)psurf;
+}
+
+static inline struct vc5_transfer *
+vc5_transfer(struct pipe_transfer *ptrans)
+{
+        return (struct vc5_transfer *)ptrans;
+}
+
+void vc5_resource_screen_init(struct pipe_screen *pscreen);
+void vc5_resource_context_init(struct pipe_context *pctx);
+struct pipe_resource *vc5_resource_create(struct pipe_screen *pscreen,
+                                          const struct pipe_resource *tmpl);
+
+#endif /* VC5_RESOURCE_H */
diff --git a/src/gallium/drivers/vc5/vc5_screen.c b/src/gallium/drivers/vc5/vc5_screen.c
new file mode 100644 (file)
index 0000000..d3c9f09
--- /dev/null
@@ -0,0 +1,620 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "os/os_misc.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_hash_table.h"
+#include "util/ralloc.h"
+
+#include <xf86drm.h>
+#include "vc5_drm.h"
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_resource.h"
+#include "compiler/v3d_compiler.h"
+
+static const char *
+vc5_screen_get_name(struct pipe_screen *pscreen)
+{
+        struct vc5_screen *screen = vc5_screen(pscreen);
+
+        if (!screen->name) {
+                screen->name = ralloc_asprintf(screen,
+                                               "VC5 V3D %d.%d",
+                                               screen->devinfo.ver / 10,
+                                               screen->devinfo.ver % 10);
+        }
+
+        return screen->name;
+}
+
+static const char *
+vc5_screen_get_vendor(struct pipe_screen *pscreen)
+{
+        return "Broadcom";
+}
+
+static void
+vc5_screen_destroy(struct pipe_screen *pscreen)
+{
+        struct vc5_screen *screen = vc5_screen(pscreen);
+
+        util_hash_table_destroy(screen->bo_handles);
+        vc5_bufmgr_destroy(pscreen);
+        slab_destroy_parent(&screen->transfer_pool);
+
+        if (using_vc5_simulator)
+                vc5_simulator_destroy(screen);
+
+        v3d_compiler_free(screen->compiler);
+
+        close(screen->fd);
+        ralloc_free(pscreen);
+}
+
+static int
+vc5_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+        switch (param) {
+                /* Supported features (boolean caps). */
+        case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+        case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+        case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+        case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+        case PIPE_CAP_NPOT_TEXTURES:
+        case PIPE_CAP_SHAREABLE_SHADERS:
+        case PIPE_CAP_USER_CONSTANT_BUFFERS:
+        case PIPE_CAP_TEXTURE_SHADOW_MAP:
+        case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+        case PIPE_CAP_TWO_SIDED_STENCIL:
+        case PIPE_CAP_TEXTURE_MULTISAMPLE:
+        case PIPE_CAP_TEXTURE_SWIZZLE:
+        case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+        case PIPE_CAP_TGSI_INSTANCEID:
+        case PIPE_CAP_SM3:
+        case PIPE_CAP_INDEP_BLEND_ENABLE: /* XXX */
+        case PIPE_CAP_TEXTURE_QUERY_LOD:
+        case PIPE_CAP_PRIMITIVE_RESTART:
+        case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+        case PIPE_CAP_OCCLUSION_QUERY:
+        case PIPE_CAP_POINT_SPRITE:
+        case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+        case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+        case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+        case PIPE_CAP_COMPUTE:
+        case PIPE_CAP_DRAW_INDIRECT:
+                return 1;
+
+        case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+                return 256;
+
+        case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+                return 4;
+
+        case PIPE_CAP_GLSL_FEATURE_LEVEL:
+                return 400;
+
+        case PIPE_CAP_MAX_VIEWPORTS:
+                return 1;
+
+        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+        case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+                return 1;
+
+        case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+        case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+                return 1;
+
+
+                /* Stream output. */
+        case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+                return 4;
+        case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+        case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+                return 64;
+
+        case PIPE_CAP_MIN_TEXEL_OFFSET:
+        case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+                return -8;
+        case PIPE_CAP_MAX_TEXEL_OFFSET:
+        case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+                return 7;
+
+                /* Unsupported features. */
+        case PIPE_CAP_ANISOTROPIC_FILTER:
+        case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+        case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+        case PIPE_CAP_CUBE_MAP_ARRAY:
+        case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+        case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+        case PIPE_CAP_SEAMLESS_CUBE_MAP:
+        case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+        case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+        case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+        case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+        case PIPE_CAP_START_INSTANCE:
+        case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+        case PIPE_CAP_SHADER_STENCIL_EXPORT:
+        case PIPE_CAP_TGSI_TEXCOORD:
+        case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+        case PIPE_CAP_CONDITIONAL_RENDER:
+        case PIPE_CAP_TEXTURE_BARRIER:
+        case PIPE_CAP_INDEP_BLEND_FUNC:
+        case PIPE_CAP_DEPTH_CLIP_DISABLE:
+        case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+        case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+        case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+        case PIPE_CAP_USER_VERTEX_BUFFERS:
+        case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+        case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+        case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+        case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
+        case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+        case PIPE_CAP_TEXTURE_GATHER_SM5:
+        case PIPE_CAP_FAKE_SW_MSAA:
+        case PIPE_CAP_SAMPLE_SHADING:
+        case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+        case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+        case PIPE_CAP_MAX_VERTEX_STREAMS:
+        case PIPE_CAP_MULTI_DRAW_INDIRECT:
+        case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+        case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+        case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+        case PIPE_CAP_SAMPLER_VIEW_TARGET:
+        case PIPE_CAP_CLIP_HALFZ:
+        case PIPE_CAP_VERTEXID_NOBASE:
+        case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+        case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+        case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+        case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+        case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+        case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+        case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+        case PIPE_CAP_DEPTH_BOUNDS_TEST:
+        case PIPE_CAP_TGSI_TXQS:
+        case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+        case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+        case PIPE_CAP_CLEAR_TEXTURE:
+        case PIPE_CAP_DRAW_PARAMETERS:
+        case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+        case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+        case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+        case PIPE_CAP_INVALIDATE_BUFFER:
+        case PIPE_CAP_GENERATE_MIPMAP:
+        case PIPE_CAP_STRING_MARKER:
+        case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+        case PIPE_CAP_QUERY_BUFFER_OBJECT:
+        case PIPE_CAP_QUERY_MEMORY_INFO:
+        case PIPE_CAP_PCI_GROUP:
+        case PIPE_CAP_PCI_BUS:
+        case PIPE_CAP_PCI_DEVICE:
+        case PIPE_CAP_PCI_FUNCTION:
+        case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+        case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+        case PIPE_CAP_CULL_DISTANCE:
+        case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+        case PIPE_CAP_TGSI_VOTE:
+        case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+        case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+        case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
+        case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+        case PIPE_CAP_TGSI_FS_FBFETCH:
+        case PIPE_CAP_INT64:
+        case PIPE_CAP_INT64_DIVMOD:
+        case PIPE_CAP_DOUBLES:
+        case PIPE_CAP_BINDLESS_TEXTURE:
+        case PIPE_CAP_POST_DEPTH_COVERAGE:
+        case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
+        case PIPE_CAP_TGSI_BALLOT:
+        case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
+        case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
+        case PIPE_CAP_TGSI_CLOCK:
+        case PIPE_CAP_TGSI_TEX_TXF_LZ:
+        case PIPE_CAP_NATIVE_FENCE_FD:
+        case PIPE_CAP_TGSI_MUL_ZERO_WINS:
+        case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+        case PIPE_CAP_QUERY_SO_OVERFLOW:
+        case PIPE_CAP_MEMOBJ:
+        case PIPE_CAP_LOAD_CONSTBUF:
+        case PIPE_CAP_TILE_RASTER_ORDER:
+                return 0;
+
+                /* Geometry shader output, unsupported. */
+        case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+        case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+                return 0;
+
+                /* Texturing. */
+        case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+        case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+                return VC5_MAX_MIP_LEVELS;
+        case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+                return 256;
+        case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+                return 2048;
+
+                /* Render targets. */
+        case PIPE_CAP_MAX_RENDER_TARGETS:
+                return 4;
+
+                /* Queries. */
+        case PIPE_CAP_QUERY_TIME_ELAPSED:
+        case PIPE_CAP_QUERY_TIMESTAMP:
+                return 0;
+
+        case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+                return 2048;
+
+        case PIPE_CAP_ENDIANNESS:
+                return PIPE_ENDIAN_LITTLE;
+
+        case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+                return 64;
+
+        case PIPE_CAP_VENDOR_ID:
+                return 0x14E4;
+        case PIPE_CAP_DEVICE_ID:
+                return 0xFFFFFFFF;
+        case PIPE_CAP_ACCELERATED:
+                return 1;
+        case PIPE_CAP_VIDEO_MEMORY: {
+                uint64_t system_memory;
+
+                if (!os_get_total_physical_memory(&system_memory))
+                        return 0;
+
+                return (int)(system_memory >> 20);
+        }
+        case PIPE_CAP_UMA:
+                return 1;
+
+        default:
+                fprintf(stderr, "unknown param %d\n", param);
+                return 0;
+        }
+}
+
+static float
+vc5_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+        switch (param) {
+        case PIPE_CAPF_MAX_LINE_WIDTH:
+        case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+                return 32;
+
+        case PIPE_CAPF_MAX_POINT_WIDTH:
+        case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+                return 512.0f;
+
+        case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+                return 0.0f;
+        case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+                return 0.0f;
+        case PIPE_CAPF_GUARD_BAND_LEFT:
+        case PIPE_CAPF_GUARD_BAND_TOP:
+        case PIPE_CAPF_GUARD_BAND_RIGHT:
+        case PIPE_CAPF_GUARD_BAND_BOTTOM:
+                return 0.0f;
+        default:
+                fprintf(stderr, "unknown paramf %d\n", param);
+                return 0;
+        }
+}
+
+static int
+vc5_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+                           enum pipe_shader_cap param)
+{
+        if (shader != PIPE_SHADER_VERTEX &&
+            shader != PIPE_SHADER_FRAGMENT) {
+                return 0;
+        }
+
+        /* this is probably not totally correct.. but it's a start: */
+        switch (param) {
+        case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+        case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+        case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+        case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+                return 16384;
+
+        case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+                return UINT_MAX;
+
+        case PIPE_SHADER_CAP_MAX_INPUTS:
+                if (shader == PIPE_SHADER_FRAGMENT)
+                        return VC5_MAX_FS_INPUTS / 4;
+                else
+                        return 16;
+        case PIPE_SHADER_CAP_MAX_OUTPUTS:
+                return shader == PIPE_SHADER_FRAGMENT ? 4 : 8;
+        case PIPE_SHADER_CAP_MAX_TEMPS:
+                return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
+        case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+                return 16 * 1024 * sizeof(float);
+        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+                return 16;
+        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+                return 0;
+        case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+        case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+        case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+                return 0;
+        case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+                return 1;
+        case PIPE_SHADER_CAP_SUBROUTINES:
+                return 0;
+        case PIPE_SHADER_CAP_INTEGERS:
+                return 1;
+        case PIPE_SHADER_CAP_FP16:
+        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+        case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+                return 0;
+        case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+        case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+        case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+        case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+                return VC5_MAX_TEXTURE_SAMPLERS;
+        case PIPE_SHADER_CAP_PREFERRED_IR:
+                return PIPE_SHADER_IR_NIR;
+        case PIPE_SHADER_CAP_SUPPORTED_IRS:
+                return 0;
+        case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+                return 32;
+        case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+        case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+                return 0;
+        default:
+                fprintf(stderr, "unknown shader param %d\n", param);
+                return 0;
+        }
+        return 0;
+}
+
+static boolean
+vc5_screen_is_format_supported(struct pipe_screen *pscreen,
+                               enum pipe_format format,
+                               enum pipe_texture_target target,
+                               unsigned sample_count,
+                               unsigned usage)
+{
+        unsigned retval = 0;
+
+        if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES)
+                return FALSE;
+
+        if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+            !util_format_is_supported(format, usage)) {
+                return FALSE;
+        }
+
+        if (usage & PIPE_BIND_VERTEX_BUFFER) {
+                switch (format) {
+                case PIPE_FORMAT_R32G32B32A32_FLOAT:
+                case PIPE_FORMAT_R32G32B32_FLOAT:
+                case PIPE_FORMAT_R32G32_FLOAT:
+                case PIPE_FORMAT_R32_FLOAT:
+                case PIPE_FORMAT_R32G32B32A32_SNORM:
+                case PIPE_FORMAT_R32G32B32_SNORM:
+                case PIPE_FORMAT_R32G32_SNORM:
+                case PIPE_FORMAT_R32_SNORM:
+                case PIPE_FORMAT_R32G32B32A32_SSCALED:
+                case PIPE_FORMAT_R32G32B32_SSCALED:
+                case PIPE_FORMAT_R32G32_SSCALED:
+                case PIPE_FORMAT_R32_SSCALED:
+                case PIPE_FORMAT_R16G16B16A16_UNORM:
+                case PIPE_FORMAT_R16G16B16_UNORM:
+                case PIPE_FORMAT_R16G16_UNORM:
+                case PIPE_FORMAT_R16_UNORM:
+                case PIPE_FORMAT_R16G16B16A16_SNORM:
+                case PIPE_FORMAT_R16G16B16_SNORM:
+                case PIPE_FORMAT_R16G16_SNORM:
+                case PIPE_FORMAT_R16_SNORM:
+                case PIPE_FORMAT_R16G16B16A16_USCALED:
+                case PIPE_FORMAT_R16G16B16_USCALED:
+                case PIPE_FORMAT_R16G16_USCALED:
+                case PIPE_FORMAT_R16_USCALED:
+                case PIPE_FORMAT_R16G16B16A16_SSCALED:
+                case PIPE_FORMAT_R16G16B16_SSCALED:
+                case PIPE_FORMAT_R16G16_SSCALED:
+                case PIPE_FORMAT_R16_SSCALED:
+                case PIPE_FORMAT_R8G8B8A8_UNORM:
+                case PIPE_FORMAT_R8G8B8_UNORM:
+                case PIPE_FORMAT_R8G8_UNORM:
+                case PIPE_FORMAT_R8_UNORM:
+                case PIPE_FORMAT_R8G8B8A8_SNORM:
+                case PIPE_FORMAT_R8G8B8_SNORM:
+                case PIPE_FORMAT_R8G8_SNORM:
+                case PIPE_FORMAT_R8_SNORM:
+                case PIPE_FORMAT_R8G8B8A8_USCALED:
+                case PIPE_FORMAT_R8G8B8_USCALED:
+                case PIPE_FORMAT_R8G8_USCALED:
+                case PIPE_FORMAT_R8_USCALED:
+                case PIPE_FORMAT_R8G8B8A8_SSCALED:
+                case PIPE_FORMAT_R8G8B8_SSCALED:
+                case PIPE_FORMAT_R8G8_SSCALED:
+                case PIPE_FORMAT_R8_SSCALED:
+                        retval |= PIPE_BIND_VERTEX_BUFFER;
+                        break;
+                default:
+                        break;
+                }
+        }
+
+        if ((usage & PIPE_BIND_RENDER_TARGET) &&
+            vc5_rt_format_supported(format)) {
+                retval |= PIPE_BIND_RENDER_TARGET;
+        }
+
+        if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+            vc5_tex_format_supported(format)) {
+                retval |= PIPE_BIND_SAMPLER_VIEW;
+        }
+
+        if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+            (format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
+             format == PIPE_FORMAT_X8Z24_UNORM ||
+             format == PIPE_FORMAT_Z16_UNORM ||
+             format == PIPE_FORMAT_Z32_FLOAT ||
+             format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+                retval |= PIPE_BIND_DEPTH_STENCIL;
+        }
+
+        if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+            (format == PIPE_FORMAT_I8_UINT ||
+             format == PIPE_FORMAT_I16_UINT ||
+             format == PIPE_FORMAT_I32_UINT)) {
+                retval |= PIPE_BIND_INDEX_BUFFER;
+        }
+
+#if 0
+        if (retval != usage) {
+                fprintf(stderr,
+                        "not supported: format=%s, target=%d, sample_count=%d, "
+                        "usage=0x%x, retval=0x%x\n", util_format_name(format),
+                        target, sample_count, usage, retval);
+        }
+#endif
+
+        return retval == usage;
+}
+
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void *key)
+{
+    return PTR_TO_UINT(key);
+}
+
+static int handle_compare(void *key1, void *key2)
+{
+    return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
+}
+
+static bool
+vc5_get_device_info(struct vc5_screen *screen)
+{
+        struct drm_vc5_get_param ident0 = {
+                .param = DRM_VC5_PARAM_V3D_CORE0_IDENT0,
+        };
+        struct drm_vc5_get_param ident1 = {
+                .param = DRM_VC5_PARAM_V3D_CORE0_IDENT1,
+        };
+        int ret;
+
+        ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_GET_PARAM, &ident0);
+        if (ret != 0) {
+                fprintf(stderr, "Couldn't get V3D core IDENT0: %s\n",
+                        strerror(errno));
+                return false;
+        }
+        ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_GET_PARAM, &ident1);
+        if (ret != 0) {
+                fprintf(stderr, "Couldn't get V3D core IDENT1: %s\n",
+                        strerror(errno));
+                return false;
+        }
+
+        uint32_t major = (ident0.value >> 24) & 0xff;
+        uint32_t minor = (ident1.value >> 0) & 0xf;
+        screen->devinfo.ver = major * 10 + minor;
+
+        if (screen->devinfo.ver != 33) {
+                fprintf(stderr,
+                        "V3D %d.%d not supported by this version of Mesa.\n",
+                        screen->devinfo.ver / 10,
+                        screen->devinfo.ver % 10);
+                return false;
+        }
+
+        return true;
+}
+
+static const void *
+vc5_screen_get_compiler_options(struct pipe_screen *pscreen,
+                                enum pipe_shader_ir ir, unsigned shader)
+{
+        return &v3d_nir_options;
+}
+
+struct pipe_screen *
+vc5_screen_create(int fd)
+{
+        struct vc5_screen *screen = rzalloc(NULL, struct vc5_screen);
+        struct pipe_screen *pscreen;
+
+        pscreen = &screen->base;
+
+        pscreen->destroy = vc5_screen_destroy;
+        pscreen->get_param = vc5_screen_get_param;
+        pscreen->get_paramf = vc5_screen_get_paramf;
+        pscreen->get_shader_param = vc5_screen_get_shader_param;
+        pscreen->context_create = vc5_context_create;
+        pscreen->is_format_supported = vc5_screen_is_format_supported;
+
+        screen->fd = fd;
+        list_inithead(&screen->bo_cache.time_list);
+        (void)mtx_init(&screen->bo_handles_mutex, mtx_plain);
+        screen->bo_handles = util_hash_table_create(handle_hash, handle_compare);
+
+#if defined(USE_VC5_SIMULATOR)
+        vc5_simulator_init(screen);
+#endif
+
+        if (!vc5_get_device_info(screen))
+                goto fail;
+
+        slab_create_parent(&screen->transfer_pool, sizeof(struct vc5_transfer), 16);
+
+        vc5_fence_init(screen);
+
+        v3d_process_debug_variable();
+
+        vc5_resource_screen_init(pscreen);
+
+        screen->compiler = v3d_compiler_init(&screen->devinfo);
+
+        pscreen->get_name = vc5_screen_get_name;
+        pscreen->get_vendor = vc5_screen_get_vendor;
+        pscreen->get_device_vendor = vc5_screen_get_vendor;
+        pscreen->get_compiler_options = vc5_screen_get_compiler_options;
+
+        return pscreen;
+
+fail:
+        close(fd);
+        ralloc_free(pscreen);
+        return NULL;
+}
diff --git a/src/gallium/drivers/vc5/vc5_screen.h b/src/gallium/drivers/vc5/vc5_screen.h
new file mode 100644 (file)
index 0000000..d804efa
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_SCREEN_H
+#define VC5_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "os/os_thread.h"
+#include "state_tracker/drm_driver.h"
+#include "util/list.h"
+#include "util/slab.h"
+#include "broadcom/common/v3d_debug.h"
+#include "broadcom/common/v3d_device_info.h"
+
+struct vc5_bo;
+
+#define VC5_MAX_MIP_LEVELS 12
+#define VC5_MAX_TEXTURE_SAMPLERS 32
+#define VC5_MAX_SAMPLES 4
+#define VC5_MAX_DRAW_BUFFERS 4
+
+struct vc5_simulator_file;
+
+struct vc5_screen {
+        struct pipe_screen base;
+        int fd;
+
+        struct v3d_device_info devinfo;
+
+        const char *name;
+
+        /** The last seqno we've completed a wait for.
+         *
+         * This lets us slightly optimize our waits by skipping wait syscalls
+         * if we know the job's already done.
+         */
+        uint64_t finished_seqno;
+
+        struct slab_parent_pool transfer_pool;
+
+        struct vc5_bo_cache {
+                /** List of struct vc5_bo freed, by age. */
+                struct list_head time_list;
+                /** List of struct vc5_bo freed, per size, by age. */
+                struct list_head *size_list;
+                uint32_t size_list_size;
+
+                mtx_t lock;
+
+                uint32_t bo_size;
+                uint32_t bo_count;
+        } bo_cache;
+
+        const struct v3d_compiler *compiler;
+
+        struct util_hash_table *bo_handles;
+        mtx_t bo_handles_mutex;
+
+        uint32_t bo_size;
+        uint32_t bo_count;
+
+        struct vc5_simulator_file *sim_file;
+};
+
+static inline struct vc5_screen *
+vc5_screen(struct pipe_screen *screen)
+{
+        return (struct vc5_screen *)screen;
+}
+
+struct pipe_screen *vc5_screen_create(int fd);
+
+void
+vc5_fence_init(struct vc5_screen *screen);
+
+struct vc5_fence *
+vc5_fence_create(struct vc5_screen *screen, uint64_t seqno);
+
+#endif /* VC5_SCREEN_H */
diff --git a/src/gallium/drivers/vc5/vc5_simulator.c b/src/gallium/drivers/vc5/vc5_simulator.c
new file mode 100644 (file)
index 0000000..3f783ea
--- /dev/null
@@ -0,0 +1,736 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc5_simulator.c
+ *
+ * Implements VC5 simulation on top of a non-VC5 GEM fd.
+ *
+ * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on
+ * top of the simpenrose software simulator.  Generally, VC5 driver BOs have a
+ * GEM-side copy of their contents and a simulator-side memory area that the
+ * GEM contents get copied into during simulation.  Once simulation is done,
+ * the simulator's data is copied back out to the GEM BOs, so that rendering
+ * appears on the screen as if actual hardware rendering had been done.
+ *
+ * One of the limitations of this code is that we shouldn't really need a
+ * GEM-side BO for non-window-system BOs.  However, do we need unique BO
+ * handles for each of our GEM bos so that this file can look up its state
+ * from the handle passed in at submit ioctl time (also, a couple of places
+ * outside of this file still call ioctls directly on the fd).
+ *
+ * Another limitation is that BO import doesn't work unless the underlying
+ * window system's BO size matches what VC5 is going to use, which of course
+ * doesn't work out in practice.  This means that for now, only DRI3 (VC5
+ * makes the winsys BOs) is supported, not DRI2 (window system makes the winys
+ * BOs).
+ */
+
+#ifdef USE_VC5_SIMULATOR
+
+#include <sys/mman.h>
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "util/u_memory.h"
+#include "util/u_mm.h"
+
+#define HW_REGISTER_RO(x) (x)
+#define HW_REGISTER_RW(x) (x)
+#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
+
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#define V3D_TECH_VERSION 3
+#define V3D_REVISION 3
+#define V3D_SUB_REV 0
+#define V3D_HIDDEN_REV 0
+#undef unreachable
+#include "v3d_hw_auto.h"
+
+/** Global (across GEM fds) state for the simulator */
+static struct vc5_simulator_state {
+        mtx_t mutex;
+
+        struct v3d_hw *v3d;
+
+        /* Base virtual address of the heap. */
+        void *mem;
+        /* Base hardware address of the heap. */
+        uint32_t mem_base;
+        /* Size of the heap. */
+        size_t mem_size;
+
+        struct mem_block *heap;
+        struct mem_block *overflow;
+
+        /** Mapping from GEM handle to struct vc5_simulator_bo * */
+        struct hash_table *fd_map;
+
+        int refcount;
+} sim_state = {
+        .mutex = _MTX_INITIALIZER_NP,
+};
+
+/** Per-GEM-fd state for the simulator. */
+struct vc5_simulator_file {
+        int fd;
+
+        /** Mapping from GEM handle to struct vc5_simulator_bo * */
+        struct hash_table *bo_map;
+
+        struct mem_block *gmp;
+        void *gmp_vaddr;
+};
+
+/** Wrapper for drm_vc5_bo tracking the simulator-specific state. */
+struct vc5_simulator_bo {
+        struct vc5_simulator_file *file;
+
+        /** Area for this BO within sim_state->mem */
+        struct mem_block *block;
+        uint32_t size;
+        void *vaddr;
+
+        void *winsys_map;
+        uint32_t winsys_stride;
+
+        int handle;
+};
+
+static void *
+int_to_key(int key)
+{
+        return (void *)(uintptr_t)key;
+}
+
+static struct vc5_simulator_file *
+vc5_get_simulator_file_for_fd(int fd)
+{
+        struct hash_entry *entry = _mesa_hash_table_search(sim_state.fd_map,
+                                                           int_to_key(fd + 1));
+        return entry ? entry->data : NULL;
+}
+
+/* A marker placed just after each BO, then checked after rendering to make
+ * sure it's still there.
+ */
+#define BO_SENTINEL            0xfedcba98
+
+/* 128kb */
+#define GMP_ALIGN2             17
+
+/**
+ * Sets the range of GPU virtual address space to have the given GMP
+ * permissions (bit 0 = read, bit 1 = write, write-only forbidden).
+ */
+static void
+set_gmp_flags(struct vc5_simulator_file *file,
+              uint32_t offset, uint32_t size, uint32_t flag)
+{
+        assert((offset & ((1 << GMP_ALIGN2) - 1)) == 0);
+        int gmp_offset = offset >> GMP_ALIGN2;
+        int gmp_count = align(size, 1 << GMP_ALIGN2) >> GMP_ALIGN2;
+        uint32_t *gmp = file->gmp_vaddr;
+
+        assert(flag <= 0x3);
+
+        for (int i = gmp_offset; i < gmp_offset + gmp_count; i++) {
+                int32_t bitshift = (i % 16) * 2;
+                gmp[i / 16] &= ~(0x3 << bitshift);
+                gmp[i / 16] |= flag << bitshift;
+        }
+}
+
+/**
+ * Allocates space in simulator memory and returns a tracking struct for it
+ * that also contains the drm_gem_cma_object struct.
+ */
+static struct vc5_simulator_bo *
+vc5_create_simulator_bo(int fd, int handle, unsigned size)
+{
+        struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+        struct vc5_simulator_bo *sim_bo = rzalloc(file,
+                                                  struct vc5_simulator_bo);
+        size = align(size, 4096);
+
+        sim_bo->file = file;
+        sim_bo->handle = handle;
+
+        mtx_lock(&sim_state.mutex);
+        sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, GMP_ALIGN2, 0);
+        mtx_unlock(&sim_state.mutex);
+        assert(sim_bo->block);
+
+        set_gmp_flags(file, sim_bo->block->ofs, size, 0x3);
+
+        sim_bo->size = size;
+        sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base;
+        memset(sim_bo->vaddr, 0xd0, size);
+
+        *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL;
+
+        /* A handle of 0 is used for vc5_gem.c internal allocations that
+         * don't need to go in the lookup table.
+         */
+        if (handle != 0) {
+                mtx_lock(&sim_state.mutex);
+                _mesa_hash_table_insert(file->bo_map, int_to_key(handle),
+                                        sim_bo);
+                mtx_unlock(&sim_state.mutex);
+        }
+
+        return sim_bo;
+}
+
+static void
+vc5_free_simulator_bo(struct vc5_simulator_bo *sim_bo)
+{
+        struct vc5_simulator_file *sim_file = sim_bo->file;
+
+        if (sim_bo->winsys_map)
+                munmap(sim_bo->winsys_map, sim_bo->size);
+
+        set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0);
+
+        mtx_lock(&sim_state.mutex);
+        u_mmFreeMem(sim_bo->block);
+        if (sim_bo->handle) {
+                struct hash_entry *entry =
+                        _mesa_hash_table_search(sim_file->bo_map,
+                                                int_to_key(sim_bo->handle));
+                _mesa_hash_table_remove(sim_file->bo_map, entry);
+        }
+        mtx_unlock(&sim_state.mutex);
+        ralloc_free(sim_bo);
+}
+
+static struct vc5_simulator_bo *
+vc5_get_simulator_bo(struct vc5_simulator_file *file, int gem_handle)
+{
+        mtx_lock(&sim_state.mutex);
+        struct hash_entry *entry =
+                _mesa_hash_table_search(file->bo_map, int_to_key(gem_handle));
+        mtx_unlock(&sim_state.mutex);
+
+        return entry ? entry->data : NULL;
+}
+
+static int
+vc5_simulator_pin_bos(int fd, struct vc5_job *job)
+{
+        struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+        struct set_entry *entry;
+
+        set_foreach(job->bos, entry) {
+                struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+                struct vc5_simulator_bo *sim_bo =
+                        vc5_get_simulator_bo(file, bo->handle);
+
+                vc5_bo_map(bo);
+                memcpy(sim_bo->vaddr, bo->map, bo->size);
+        }
+
+        return 0;
+}
+
+static int
+vc5_simulator_unpin_bos(int fd, struct vc5_job *job)
+{
+        struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+        struct set_entry *entry;
+
+        set_foreach(job->bos, entry) {
+                struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+                struct vc5_simulator_bo *sim_bo =
+                        vc5_get_simulator_bo(file, bo->handle);
+
+                assert(*(uint32_t *)(sim_bo->vaddr +
+                                     sim_bo->size) == BO_SENTINEL);
+
+                vc5_bo_map(bo);
+                memcpy(bo->map, sim_bo->vaddr, bo->size);
+        }
+
+        return 0;
+}
+
+#if 0
+static void
+vc5_dump_to_file(struct vc5_exec_info *exec)
+{
+        static int dumpno = 0;
+        struct drm_vc5_get_hang_state *state;
+        struct drm_vc5_get_hang_state_bo *bo_state;
+        unsigned int dump_version = 0;
+
+        if (!(vc5_debug & VC5_DEBUG_DUMP))
+                return;
+
+        state = calloc(1, sizeof(*state));
+
+        int unref_count = 0;
+        list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+                                 unref_head) {
+                unref_count++;
+        }
+
+        /* Add one more for the overflow area that isn't wrapped in a BO. */
+        state->bo_count = exec->bo_count + unref_count + 1;
+        bo_state = calloc(state->bo_count, sizeof(*bo_state));
+
+        char *filename = NULL;
+        asprintf(&filename, "vc5-dri-%d.dump", dumpno++);
+        FILE *f = fopen(filename, "w+");
+        if (!f) {
+                fprintf(stderr, "Couldn't open %s: %s", filename,
+                        strerror(errno));
+                return;
+        }
+
+        fwrite(&dump_version, sizeof(dump_version), 1, f);
+
+        state->ct0ca = exec->ct0ca;
+        state->ct0ea = exec->ct0ea;
+        state->ct1ca = exec->ct1ca;
+        state->ct1ea = exec->ct1ea;
+        state->start_bin = exec->ct0ca;
+        state->start_render = exec->ct1ca;
+        fwrite(state, sizeof(*state), 1, f);
+
+        int i;
+        for (i = 0; i < exec->bo_count; i++) {
+                struct drm_gem_cma_object *cma_bo = exec->bo[i];
+                bo_state[i].handle = i; /* Not used by the parser. */
+                bo_state[i].paddr = cma_bo->paddr;
+                bo_state[i].size = cma_bo->base.size;
+        }
+
+        list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+                                 unref_head) {
+                struct drm_gem_cma_object *cma_bo = &bo->base;
+                bo_state[i].handle = 0;
+                bo_state[i].paddr = cma_bo->paddr;
+                bo_state[i].size = cma_bo->base.size;
+                i++;
+        }
+
+        /* Add the static overflow memory area. */
+        bo_state[i].handle = exec->bo_count;
+        bo_state[i].paddr = sim_state.overflow->ofs;
+        bo_state[i].size = sim_state.overflow->size;
+        i++;
+
+        fwrite(bo_state, sizeof(*bo_state), state->bo_count, f);
+
+        for (int i = 0; i < exec->bo_count; i++) {
+                struct drm_gem_cma_object *cma_bo = exec->bo[i];
+                fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+        }
+
+        list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+                                 unref_head) {
+                struct drm_gem_cma_object *cma_bo = &bo->base;
+                fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+        }
+
+        void *overflow = calloc(1, sim_state.overflow->size);
+        fwrite(overflow, 1, sim_state.overflow->size, f);
+        free(overflow);
+
+        free(state);
+        free(bo_state);
+        fclose(f);
+}
+#endif
+
+#define V3D_WRITE(reg, val) v3d_hw_write_reg(sim_state.v3d, reg, val)
+#define V3D_READ(reg) v3d_hw_read_reg(sim_state.v3d, reg)
+
+static void
+vc5_flush_l3(void)
+{
+        if (!v3d_hw_has_gca(sim_state.v3d))
+                return;
+
+        uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
+
+        V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
+        V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
+}
+
+/* Invalidates the L2 cache.  This is a read-only cache. */
+static void
+vc5_flush_l2(void)
+{
+        V3D_WRITE(V3D_CTL_0_L2CACTL,
+                  V3D_CTL_0_L2CACTL_L2CCLR_SET |
+                  V3D_CTL_0_L2CACTL_L2CENA_SET);
+}
+
+/* Invalidates texture L2 cachelines */
+static void
+vc5_flush_l2t(void)
+{
+        V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
+        V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
+        V3D_WRITE(V3D_CTL_0_L2TCACTL,
+                  V3D_CTL_0_L2TCACTL_L2TFLS_SET |
+                  (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
+}
+
+/* Invalidates the slice caches.  These are read-only caches. */
+static void
+vc5_flush_slices(void)
+{
+        V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
+}
+
+static void
+vc5_flush_caches(void)
+{
+        vc5_flush_l3();
+        vc5_flush_l2();
+        vc5_flush_l2t();
+        vc5_flush_slices();
+}
+
+int
+vc5_simulator_flush(struct vc5_context *vc5,
+                    struct drm_vc5_submit_cl *submit, struct vc5_job *job)
+{
+        struct vc5_screen *screen = vc5->screen;
+        int fd = screen->fd;
+        struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+        struct vc5_surface *csurf = vc5_surface(vc5->framebuffer.cbufs[0]);
+        struct vc5_resource *ctex = csurf ? vc5_resource(csurf->base.texture) : NULL;
+        struct vc5_simulator_bo *csim_bo = ctex ? vc5_get_simulator_bo(file, ctex->bo->handle) : NULL;
+        uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0;
+        uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
+        uint32_t row_len = MIN2(sim_stride, winsys_stride);
+        int ret;
+
+        if (ctex && csim_bo->winsys_map) {
+#if 0
+                fprintf(stderr, "%dx%d %d %d %d\n",
+                        ctex->base.b.width0, ctex->base.b.height0,
+                        winsys_stride,
+                        sim_stride,
+                        ctex->bo->size);
+#endif
+
+                for (int y = 0; y < ctex->base.b.height0; y++) {
+                        memcpy(ctex->bo->map + y * sim_stride,
+                               csim_bo->winsys_map + y * winsys_stride,
+                               row_len);
+                }
+        }
+
+        ret = vc5_simulator_pin_bos(fd, job);
+        if (ret)
+                return ret;
+
+        //vc5_dump_to_file(&exec);
+
+        /* Completely reset the GMP. */
+        v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_CFG,
+                         V3D_GMP_0_CFG_PROTENABLE_SET);
+        v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_TABLE_ADDR, file->gmp->ofs);
+        v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_CLEAR_LOAD, ~0);
+        while (v3d_hw_read_reg(sim_state.v3d, V3D_GMP_0_STATUS) &
+               V3D_GMP_0_STATUS_CFG_BUSY_SET) {
+                ;
+        }
+
+        vc5_flush_caches();
+
+        v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT0QBA, submit->bcl_start);
+        v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT0QEA, submit->bcl_end);
+
+        /* Wait for bin to complete before firing render, as it seems the
+         * simulator doesn't implement the semaphores.
+         */
+        while (v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT0CA) !=
+               v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT0EA)) {
+                v3d_hw_tick(sim_state.v3d);
+        }
+
+        v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT1QBA, submit->rcl_start);
+        v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT1QEA, submit->rcl_end);
+
+        while (v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT1CA) !=
+               v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT1EA) ||
+               v3d_hw_read_reg(sim_state.v3d, V3D_CLE_1_CT1CA) !=
+               v3d_hw_read_reg(sim_state.v3d, V3D_CLE_1_CT1EA)) {
+                v3d_hw_tick(sim_state.v3d);
+        }
+
+        ret = vc5_simulator_unpin_bos(fd, job);
+        if (ret)
+                return ret;
+
+        if (ctex && csim_bo->winsys_map) {
+                for (int y = 0; y < ctex->base.b.height0; y++) {
+                        memcpy(csim_bo->winsys_map + y * winsys_stride,
+                               ctex->bo->map + y * sim_stride,
+                               row_len);
+                }
+        }
+
+        return 0;
+}
+
+/**
+ * Map the underlying GEM object from the real hardware GEM handle.
+ */
+static void *
+vc5_simulator_map_winsys_bo(int fd, struct vc5_simulator_bo *sim_bo)
+{
+        int ret;
+        void *map;
+
+        struct drm_mode_map_dumb map_dumb = {
+                .handle = sim_bo->handle,
+        };
+        ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb);
+        if (ret != 0) {
+                fprintf(stderr, "map ioctl failure\n");
+                abort();
+        }
+
+        map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                   fd, map_dumb.offset);
+        if (map == MAP_FAILED) {
+                fprintf(stderr,
+                        "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+                        sim_bo->handle, (long long)map_dumb.offset,
+                        (int)sim_bo->size);
+                abort();
+        }
+
+        return map;
+}
+
+/**
+ * Do fixups after a BO has been opened from a handle.
+ *
+ * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE
+ * time, but we're still using drmPrimeFDToHandle() so we have this helper to
+ * be called afterward instead.
+ */
+void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride,
+                                    int handle, uint32_t size)
+{
+        struct vc5_simulator_bo *sim_bo =
+                vc5_create_simulator_bo(fd, handle, size);
+
+        sim_bo->winsys_stride = winsys_stride;
+        sim_bo->winsys_map = vc5_simulator_map_winsys_bo(fd, sim_bo);
+}
+
+/**
+ * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation.
+ *
+ * Making a VC5 BO is just a matter of making a corresponding BO on the host.
+ */
+static int
+vc5_simulator_create_bo_ioctl(int fd, struct drm_vc5_create_bo *args)
+{
+        int ret;
+        struct drm_mode_create_dumb create = {
+                .width = 128,
+                .bpp = 8,
+                .height = (args->size + 127) / 128,
+        };
+
+        ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create);
+        assert(create.size >= args->size);
+
+        args->handle = create.handle;
+
+        struct vc5_simulator_bo *sim_bo =
+                vc5_create_simulator_bo(fd, create.handle, args->size);
+
+        args->offset = sim_bo->block->ofs;
+
+        return ret;
+}
+
+/**
+ * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation.
+ *
+ * We just pass this straight through to dumb mmap.
+ */
+static int
+vc5_simulator_mmap_bo_ioctl(int fd, struct drm_vc5_mmap_bo *args)
+{
+        int ret;
+        struct drm_mode_map_dumb map = {
+                .handle = args->handle,
+        };
+
+        ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map);
+        args->offset = map.offset;
+
+        return ret;
+}
+
+static int
+vc5_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args)
+{
+        /* Free the simulator's internal tracking. */
+        struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+        struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file,
+                                                               args->handle);
+
+        vc5_free_simulator_bo(sim_bo);
+
+        /* Pass the call on down. */
+        return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, args);
+}
+
+static int
+vc5_simulator_get_param_ioctl(int fd, struct drm_vc5_get_param *args)
+{
+        static const uint32_t reg_map[] = {
+                [DRM_VC5_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
+                [DRM_VC5_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
+                [DRM_VC5_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
+                [DRM_VC5_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
+                [DRM_VC5_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
+                [DRM_VC5_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
+                [DRM_VC5_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
+        };
+
+        if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
+                args->value = v3d_hw_read_reg(sim_state.v3d,
+                                              reg_map[args->param]);
+                return 0;
+        }
+
+        fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n",
+                (long long)args->value);
+        abort();
+}
+
+int
+vc5_simulator_ioctl(int fd, unsigned long request, void *args)
+{
+        switch (request) {
+        case DRM_IOCTL_VC5_CREATE_BO:
+                return vc5_simulator_create_bo_ioctl(fd, args);
+        case DRM_IOCTL_VC5_MMAP_BO:
+                return vc5_simulator_mmap_bo_ioctl(fd, args);
+
+        case DRM_IOCTL_VC5_WAIT_BO:
+        case DRM_IOCTL_VC5_WAIT_SEQNO:
+                /* We do all of the vc5 rendering synchronously, so we just
+                 * return immediately on the wait ioctls.  This ignores any
+                 * native rendering to the host BO, so it does mean we race on
+                 * front buffer rendering.
+                 */
+                return 0;
+
+        case DRM_IOCTL_VC5_GET_PARAM:
+                return vc5_simulator_get_param_ioctl(fd, args);
+
+        case DRM_IOCTL_GEM_CLOSE:
+                return vc5_simulator_gem_close_ioctl(fd, args);
+
+        case DRM_IOCTL_GEM_OPEN:
+        case DRM_IOCTL_GEM_FLINK:
+                return drmIoctl(fd, request, args);
+        default:
+                fprintf(stderr, "Unknown ioctl 0x%08x\n", (int)request);
+                abort();
+        }
+}
+
+static void
+vc5_simulator_init_global(void)
+{
+        mtx_lock(&sim_state.mutex);
+        if (sim_state.refcount++) {
+                mtx_unlock(&sim_state.mutex);
+                return;
+        }
+
+        sim_state.v3d = v3d_hw_auto_new(NULL);
+        v3d_hw_alloc_mem(sim_state.v3d, 256 * 1024 * 1024);
+        sim_state.mem_base =
+                v3d_hw_get_mem(sim_state.v3d, &sim_state.mem_size,
+                               &sim_state.mem);
+
+        sim_state.heap = u_mmInit(0, sim_state.mem_size);
+
+        /* Make a block of 0xd0 at address 0 to make sure we don't screw up
+         * and land there.
+         */
+        struct mem_block *b = u_mmAllocMem(sim_state.heap, 4096, GMP_ALIGN2, 0);
+        memset(sim_state.mem + b->ofs - sim_state.mem_base, 0xd0, 4096);
+
+        mtx_unlock(&sim_state.mutex);
+
+        sim_state.fd_map =
+                _mesa_hash_table_create(NULL,
+                                        _mesa_hash_pointer,
+                                        _mesa_key_pointer_equal);
+}
+
+void
+vc5_simulator_init(struct vc5_screen *screen)
+{
+        vc5_simulator_init_global();
+
+        screen->sim_file = rzalloc(screen, struct vc5_simulator_file);
+        struct vc5_simulator_file *sim_file = screen->sim_file;
+
+        screen->sim_file->bo_map =
+                _mesa_hash_table_create(screen->sim_file,
+                                        _mesa_hash_pointer,
+                                        _mesa_key_pointer_equal);
+
+        mtx_lock(&sim_state.mutex);
+        _mesa_hash_table_insert(sim_state.fd_map, int_to_key(screen->fd + 1),
+                                screen->sim_file);
+        mtx_unlock(&sim_state.mutex);
+
+        sim_file->gmp = u_mmAllocMem(sim_state.heap, 8096, GMP_ALIGN2, 0);
+        sim_file->gmp_vaddr = (sim_state.mem + sim_file->gmp->ofs -
+                               sim_state.mem_base);
+}
+
+void
+vc5_simulator_destroy(struct vc5_screen *screen)
+{
+        mtx_lock(&sim_state.mutex);
+        if (!--sim_state.refcount) {
+                _mesa_hash_table_destroy(sim_state.fd_map, NULL);
+                u_mmDestroy(sim_state.heap);
+                /* No memsetting the struct, because it contains the mutex. */
+                sim_state.mem = NULL;
+        }
+        mtx_unlock(&sim_state.mutex);
+}
+
+#endif /* USE_VC5_SIMULATOR */
diff --git a/src/gallium/drivers/vc5/vc5_state.c b/src/gallium/drivers/vc5/vc5_state.c
new file mode 100644 (file)
index 0000000..b289d20
--- /dev/null
@@ -0,0 +1,663 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_half.h"
+#include "util/u_helpers.h"
+
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static void *
+vc5_generic_cso_state_create(const void *src, uint32_t size)
+{
+        void *dst = calloc(1, size);
+        if (!dst)
+                return NULL;
+        memcpy(dst, src, size);
+        return dst;
+}
+
+static void
+vc5_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        free(hwcso);
+}
+
+static void
+vc5_set_blend_color(struct pipe_context *pctx,
+                    const struct pipe_blend_color *blend_color)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->blend_color.f = *blend_color;
+        for (int i = 0; i < 4; i++) {
+                vc5->blend_color.hf[i] =
+                        util_float_to_half(blend_color->color[i]);
+        }
+        vc5->dirty |= VC5_DIRTY_BLEND_COLOR;
+}
+
+static void
+vc5_set_stencil_ref(struct pipe_context *pctx,
+                    const struct pipe_stencil_ref *stencil_ref)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->stencil_ref = *stencil_ref;
+        vc5->dirty |= VC5_DIRTY_STENCIL_REF;
+}
+
+static void
+vc5_set_clip_state(struct pipe_context *pctx,
+                   const struct pipe_clip_state *clip)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->clip = *clip;
+        vc5->dirty |= VC5_DIRTY_CLIP;
+}
+
+static void
+vc5_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1);
+        vc5->dirty |= VC5_DIRTY_SAMPLE_MASK;
+}
+
+static uint16_t
+float_to_187_half(float f)
+{
+        return fui(f) >> 16;
+}
+
+static void *
+vc5_create_rasterizer_state(struct pipe_context *pctx,
+                            const struct pipe_rasterizer_state *cso)
+{
+        struct vc5_rasterizer_state *so;
+
+        so = CALLOC_STRUCT(vc5_rasterizer_state);
+        if (!so)
+                return NULL;
+
+        so->base = *cso;
+
+        /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
+         * BCM21553).
+         */
+        so->point_size = MAX2(cso->point_size, .125f);
+
+        if (cso->offset_tri) {
+                so->offset_units = float_to_187_half(cso->offset_units);
+                so->offset_factor = float_to_187_half(cso->offset_scale);
+        }
+
+        return so;
+}
+
+/* Blend state is baked into shaders. */
+static void *
+vc5_create_blend_state(struct pipe_context *pctx,
+                       const struct pipe_blend_state *cso)
+{
+        return vc5_generic_cso_state_create(cso, sizeof(*cso));
+}
+
+static void *
+vc5_create_depth_stencil_alpha_state(struct pipe_context *pctx,
+                                     const struct pipe_depth_stencil_alpha_state *cso)
+{
+        struct vc5_depth_stencil_alpha_state *so;
+
+        so = CALLOC_STRUCT(vc5_depth_stencil_alpha_state);
+        if (!so)
+                return NULL;
+
+        so->base = *cso;
+
+        if (cso->depth.enabled) {
+                /* We only handle early Z in the < direction because otherwise
+                 * we'd have to runtime guess which direction to set in the
+                 * render config.
+                 */
+                so->early_z_enable =
+                        ((cso->depth.func == PIPE_FUNC_LESS ||
+                          cso->depth.func == PIPE_FUNC_LEQUAL) &&
+                         (!cso->stencil[0].enabled ||
+                          (cso->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP &&
+                           (!cso->stencil[1].enabled ||
+                            cso->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP))));
+        }
+
+        return so;
+}
+
+static void
+vc5_set_polygon_stipple(struct pipe_context *pctx,
+                        const struct pipe_poly_stipple *stipple)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->stipple = *stipple;
+        vc5->dirty |= VC5_DIRTY_STIPPLE;
+}
+
+static void
+vc5_set_scissor_states(struct pipe_context *pctx,
+                       unsigned start_slot,
+                       unsigned num_scissors,
+                       const struct pipe_scissor_state *scissor)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+
+        vc5->scissor = *scissor;
+        vc5->dirty |= VC5_DIRTY_SCISSOR;
+}
+
+static void
+vc5_set_viewport_states(struct pipe_context *pctx,
+                        unsigned start_slot,
+                        unsigned num_viewports,
+                        const struct pipe_viewport_state *viewport)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->viewport = *viewport;
+        vc5->dirty |= VC5_DIRTY_VIEWPORT;
+}
+
+static void
+vc5_set_vertex_buffers(struct pipe_context *pctx,
+                       unsigned start_slot, unsigned count,
+                       const struct pipe_vertex_buffer *vb)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_vertexbuf_stateobj *so = &vc5->vertexbuf;
+
+        util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb,
+                                     start_slot, count);
+        so->count = util_last_bit(so->enabled_mask);
+
+        vc5->dirty |= VC5_DIRTY_VTXBUF;
+}
+
+static void
+vc5_blend_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->blend = hwcso;
+        vc5->dirty |= VC5_DIRTY_BLEND;
+}
+
+static void
+vc5_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_rasterizer_state *rast = hwcso;
+
+        if (vc5->rasterizer && rast &&
+            vc5->rasterizer->base.flatshade != rast->base.flatshade) {
+                vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
+        }
+
+        vc5->rasterizer = hwcso;
+        vc5->dirty |= VC5_DIRTY_RASTERIZER;
+}
+
+static void
+vc5_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->zsa = hwcso;
+        vc5->dirty |= VC5_DIRTY_ZSA;
+}
+
+static void *
+vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
+                        const struct pipe_vertex_element *elements)
+{
+        struct vc5_vertex_stateobj *so = CALLOC_STRUCT(vc5_vertex_stateobj);
+
+        if (!so)
+                return NULL;
+
+        memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
+        so->num_elements = num_elements;
+
+        return so;
+}
+
+static void
+vc5_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        vc5->vtx = hwcso;
+        vc5->dirty |= VC5_DIRTY_VTXSTATE;
+}
+
+static void
+vc5_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index,
+                        const struct pipe_constant_buffer *cb)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_constbuf_stateobj *so = &vc5->constbuf[shader];
+
+        util_copy_constant_buffer(&so->cb[index], cb);
+
+        /* Note that the state tracker can unbind constant buffers by
+         * passing NULL here.
+         */
+        if (unlikely(!cb)) {
+                so->enabled_mask &= ~(1 << index);
+                so->dirty_mask &= ~(1 << index);
+                return;
+        }
+
+        so->enabled_mask |= 1 << index;
+        so->dirty_mask |= 1 << index;
+        vc5->dirty |= VC5_DIRTY_CONSTBUF;
+}
+
+static void
+vc5_set_framebuffer_state(struct pipe_context *pctx,
+                          const struct pipe_framebuffer_state *framebuffer)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct pipe_framebuffer_state *cso = &vc5->framebuffer;
+        unsigned i;
+
+        vc5->job = NULL;
+
+        for (i = 0; i < framebuffer->nr_cbufs; i++)
+                pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
+        for (; i < vc5->framebuffer.nr_cbufs; i++)
+                pipe_surface_reference(&cso->cbufs[i], NULL);
+
+        cso->nr_cbufs = framebuffer->nr_cbufs;
+
+        pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf);
+
+        cso->width = framebuffer->width;
+        cso->height = framebuffer->height;
+
+        vc5->dirty |= VC5_DIRTY_FRAMEBUFFER;
+}
+
+static struct vc5_texture_stateobj *
+vc5_get_stage_tex(struct vc5_context *vc5, enum pipe_shader_type shader)
+{
+        switch (shader) {
+        case PIPE_SHADER_FRAGMENT:
+                vc5->dirty |= VC5_DIRTY_FRAGTEX;
+                return &vc5->fragtex;
+                break;
+        case PIPE_SHADER_VERTEX:
+                vc5->dirty |= VC5_DIRTY_VERTTEX;
+                return &vc5->verttex;
+                break;
+        default:
+                fprintf(stderr, "Unknown shader target %d\n", shader);
+                abort();
+        }
+}
+
+static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest)
+{
+        switch (pipe_wrap) {
+        case PIPE_TEX_WRAP_REPEAT:
+                return 0;
+        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+                return 1;
+        case PIPE_TEX_WRAP_MIRROR_REPEAT:
+                return 2;
+        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+                return 3;
+        case PIPE_TEX_WRAP_CLAMP:
+                return (using_nearest ? 1 : 3);
+        default:
+                unreachable("Unknown wrap mode");
+        }
+}
+
+
+static void *
+vc5_create_sampler_state(struct pipe_context *pctx,
+                         const struct pipe_sampler_state *cso)
+{
+        struct vc5_sampler_state *so = CALLOC_STRUCT(vc5_sampler_state);
+
+        if (!so)
+                return NULL;
+
+        memcpy(so, cso, sizeof(*cso));
+
+        bool either_nearest =
+                (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+                 cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+
+        struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = {
+                .s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest),
+                .t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest),
+                .r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest),
+        };
+        V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
+                                                         (uint8_t *)&so->p0,
+                                                         &p0_unpacked);
+
+        struct V3D33_TEXTURE_SHADER_STATE state_unpacked = {
+                cl_packet_header(TEXTURE_SHADER_STATE),
+
+                .min_level_of_detail = MAX2(cso->min_lod, 0.0),
+                .depth_compare_function = cso->compare_func,
+                .fixed_bias = cso->lod_bias,
+        };
+        STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) ==
+                      cl_packet_length(TEXTURE_SHADER_STATE));
+        cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state,
+                                             &state_unpacked);
+
+        return so;
+}
+
+static void
+vc5_sampler_states_bind(struct pipe_context *pctx,
+                        enum pipe_shader_type shader, unsigned start,
+                        unsigned nr, void **hwcso)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader);
+
+        assert(start == 0);
+        unsigned i;
+        unsigned new_nr = 0;
+
+        for (i = 0; i < nr; i++) {
+                if (hwcso[i])
+                        new_nr = i + 1;
+                stage_tex->samplers[i] = hwcso[i];
+        }
+
+        for (; i < stage_tex->num_samplers; i++) {
+                stage_tex->samplers[i] = NULL;
+        }
+
+        stage_tex->num_samplers = new_nr;
+}
+
+static uint32_t
+translate_swizzle(unsigned char pipe_swizzle)
+{
+        switch (pipe_swizzle) {
+        case PIPE_SWIZZLE_0:
+                return 0;
+        case PIPE_SWIZZLE_1:
+                return 1;
+        case PIPE_SWIZZLE_X:
+        case PIPE_SWIZZLE_Y:
+        case PIPE_SWIZZLE_Z:
+        case PIPE_SWIZZLE_W:
+                return 2 + pipe_swizzle;
+        default:
+                unreachable("unknown swizzle");
+        }
+}
+
+static struct pipe_sampler_view *
+vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
+                        const struct pipe_sampler_view *cso)
+{
+        struct vc5_sampler_view *so = CALLOC_STRUCT(vc5_sampler_view);
+        struct vc5_resource *rsc = vc5_resource(prsc);
+
+        if (!so)
+                return NULL;
+
+        so->base = *cso;
+
+        pipe_reference(NULL, &prsc->reference);
+
+        struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
+        };
+
+        unpacked.return_word_0_of_texture_data = true;
+        if (vc5_get_tex_return_size(cso->format) == 16) {
+                unpacked.return_word_1_of_texture_data = true;
+        } else {
+                int chans = vc5_get_tex_return_channels(cso->format);
+
+                if (chans > 1)
+                        unpacked.return_word_1_of_texture_data = true;
+                if (chans > 2)
+                        unpacked.return_word_2_of_texture_data = true;
+                if (chans > 3)
+                        unpacked.return_word_3_of_texture_data = true;
+        }
+
+        V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL,
+                                                         (uint8_t *)&so->p1,
+                                                         &unpacked);
+
+        /* Compute the sampler view's swizzle up front. This will be plugged
+         * into either the sampler (for 16-bit returns) or the shader's
+         * texture key (for 32)
+         */
+        uint8_t view_swizzle[4] = {
+                cso->swizzle_r,
+                cso->swizzle_g,
+                cso->swizzle_b,
+                cso->swizzle_a
+        };
+        const uint8_t *fmt_swizzle = vc5_get_format_swizzle(so->base.format);
+        util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle);
+
+        so->base.texture = prsc;
+        so->base.reference.count = 1;
+        so->base.context = pctx;
+
+        struct V3D33_TEXTURE_SHADER_STATE state_unpacked = {
+                cl_packet_header(TEXTURE_SHADER_STATE),
+
+                .image_width = prsc->width0,
+                .image_height = prsc->height0,
+                .image_depth = prsc->depth0,
+
+                .texture_type = rsc->tex_format,
+                .srgb = util_format_is_srgb(cso->format),
+
+                .base_level = cso->u.tex.first_level,
+                .array_stride_64_byte_aligned = rsc->cube_map_stride / 64,
+        };
+
+        /* Note: Contrary to the docs, the swizzle still applies even
+         * if the return size is 32.  It's just that you probably want
+         * to swizzle in the shader, because you need the Y/Z/W
+         * channels to be defined.
+         */
+        if (vc5_get_tex_return_size(cso->format) != 32) {
+                state_unpacked.swizzle_r = translate_swizzle(so->swizzle[0]);
+                state_unpacked.swizzle_g = translate_swizzle(so->swizzle[1]);
+                state_unpacked.swizzle_b = translate_swizzle(so->swizzle[2]);
+                state_unpacked.swizzle_a = translate_swizzle(so->swizzle[3]);
+        } else {
+                state_unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
+                state_unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
+                state_unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
+                state_unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
+        }
+
+        /* XXX: While we need to use this flag to enable tiled
+         * resource sharing (even a small shared buffer should be UIF,
+         * not UBLINEAR or raster), this is also at the moment
+         * patching up the fact that our resource layout's decisions
+         * about XOR don't quite match the HW's.
+         */
+        switch (rsc->slices[0].tiling) {
+        case VC5_TILING_UIF_NO_XOR:
+        case VC5_TILING_UIF_XOR:
+                state_unpacked.level_0_is_strictly_uif = true;
+                state_unpacked.level_0_xor_enable = false;
+                break;
+        default:
+                break;
+        }
+
+        STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) ==
+                      cl_packet_length(TEXTURE_SHADER_STATE));
+        cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state,
+                                             &state_unpacked);
+
+        return &so->base;
+}
+
+static void
+vc5_sampler_view_destroy(struct pipe_context *pctx,
+                         struct pipe_sampler_view *view)
+{
+        pipe_resource_reference(&view->texture, NULL);
+        free(view);
+}
+
+static void
+vc5_set_sampler_views(struct pipe_context *pctx,
+                      enum pipe_shader_type shader,
+                      unsigned start, unsigned nr,
+                      struct pipe_sampler_view **views)
+{
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader);
+        unsigned i;
+        unsigned new_nr = 0;
+
+        assert(start == 0);
+
+        for (i = 0; i < nr; i++) {
+                if (views[i])
+                        new_nr = i + 1;
+                pipe_sampler_view_reference(&stage_tex->textures[i], views[i]);
+        }
+
+        for (; i < stage_tex->num_textures; i++) {
+                pipe_sampler_view_reference(&stage_tex->textures[i], NULL);
+        }
+
+        stage_tex->num_textures = new_nr;
+}
+
+static struct pipe_stream_output_target *
+vc5_create_stream_output_target(struct pipe_context *pctx,
+                                struct pipe_resource *prsc,
+                                unsigned buffer_offset,
+                                unsigned buffer_size)
+{
+        struct pipe_stream_output_target *target;
+
+        target = CALLOC_STRUCT(pipe_stream_output_target);
+        if (!target)
+                return NULL;
+
+        pipe_reference_init(&target->reference, 1);
+        pipe_resource_reference(&target->buffer, prsc);
+
+        target->context = pctx;
+        target->buffer_offset = buffer_offset;
+        target->buffer_size = buffer_size;
+
+        return target;
+}
+
+static void
+vc5_stream_output_target_destroy(struct pipe_context *pctx,
+                                 struct pipe_stream_output_target *target)
+{
+        pipe_resource_reference(&target->buffer, NULL);
+        free(target);
+}
+
+static void
+vc5_set_stream_output_targets(struct pipe_context *pctx,
+                              unsigned num_targets,
+                              struct pipe_stream_output_target **targets,
+                              const unsigned *offsets)
+{
+        struct vc5_context *ctx = vc5_context(pctx);
+        struct vc5_streamout_stateobj *so = &ctx->streamout;
+        unsigned i;
+
+        assert(num_targets <= ARRAY_SIZE(so->targets));
+
+        for (i = 0; i < num_targets; i++)
+                pipe_so_target_reference(&so->targets[i], targets[i]);
+
+        for (; i < so->num_targets; i++)
+                pipe_so_target_reference(&so->targets[i], NULL);
+
+        so->num_targets = num_targets;
+
+        ctx->dirty |= VC5_DIRTY_STREAMOUT;
+}
+
+void
+vc5_state_init(struct pipe_context *pctx)
+{
+        pctx->set_blend_color = vc5_set_blend_color;
+        pctx->set_stencil_ref = vc5_set_stencil_ref;
+        pctx->set_clip_state = vc5_set_clip_state;
+        pctx->set_sample_mask = vc5_set_sample_mask;
+        pctx->set_constant_buffer = vc5_set_constant_buffer;
+        pctx->set_framebuffer_state = vc5_set_framebuffer_state;
+        pctx->set_polygon_stipple = vc5_set_polygon_stipple;
+        pctx->set_scissor_states = vc5_set_scissor_states;
+        pctx->set_viewport_states = vc5_set_viewport_states;
+
+        pctx->set_vertex_buffers = vc5_set_vertex_buffers;
+
+        pctx->create_blend_state = vc5_create_blend_state;
+        pctx->bind_blend_state = vc5_blend_state_bind;
+        pctx->delete_blend_state = vc5_generic_cso_state_delete;
+
+        pctx->create_rasterizer_state = vc5_create_rasterizer_state;
+        pctx->bind_rasterizer_state = vc5_rasterizer_state_bind;
+        pctx->delete_rasterizer_state = vc5_generic_cso_state_delete;
+
+        pctx->create_depth_stencil_alpha_state = vc5_create_depth_stencil_alpha_state;
+        pctx->bind_depth_stencil_alpha_state = vc5_zsa_state_bind;
+        pctx->delete_depth_stencil_alpha_state = vc5_generic_cso_state_delete;
+
+        pctx->create_vertex_elements_state = vc5_vertex_state_create;
+        pctx->delete_vertex_elements_state = vc5_generic_cso_state_delete;
+        pctx->bind_vertex_elements_state = vc5_vertex_state_bind;
+
+        pctx->create_sampler_state = vc5_create_sampler_state;
+        pctx->delete_sampler_state = vc5_generic_cso_state_delete;
+        pctx->bind_sampler_states = vc5_sampler_states_bind;
+
+        pctx->create_sampler_view = vc5_create_sampler_view;
+        pctx->sampler_view_destroy = vc5_sampler_view_destroy;
+        pctx->set_sampler_views = vc5_set_sampler_views;
+
+        pctx->create_stream_output_target = vc5_create_stream_output_target;
+        pctx->stream_output_target_destroy = vc5_stream_output_target_destroy;
+        pctx->set_stream_output_targets = vc5_set_stream_output_targets;
+}
diff --git a/src/gallium/drivers/vc5/vc5_tiling.c b/src/gallium/drivers/vc5/vc5_tiling.c
new file mode 100644 (file)
index 0000000..279774e
--- /dev/null
@@ -0,0 +1,402 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_tiling.c
+ *
+ * Handles information about the VC5 tiling formats, and loading and storing
+ * from them.
+ */
+
+#include <stdint.h>
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_tiling.h"
+
+struct mb_layout {
+        /** Height, in pixels, of a macroblock (2x2 utiles, a UIF block). */
+        uint32_t height;
+        /** Width, in pixels, of a macroblock (2x2 utiles, a UIF block). */
+        uint32_t width;
+        uint32_t tile_row_stride;
+};
+
+enum {
+        MB_LAYOUT_8BPP,
+        MB_LAYOUT_16BPP,
+        MB_LAYOUT_32BPP,
+        MB_LAYOUT_64BPP,
+        MB_LAYOUT_128BPP,
+};
+
+static const struct mb_layout mb_layouts[] = {
+        [MB_LAYOUT_8BPP] = { .height = 16, .width = 16, .tile_row_stride = 8 },
+        [MB_LAYOUT_16BPP] = { .height = 8, .width = 16, .tile_row_stride = 8 },
+        [MB_LAYOUT_32BPP] = { .height = 8, .width = 8, .tile_row_stride = 4 },
+        [MB_LAYOUT_64BPP] = { .height = 4, .width = 8, .tile_row_stride = 4 },
+        [MB_LAYOUT_128BPP] = { .height = 4, .width = 4, .tile_row_stride = 2 },
+};
+
+static const struct mb_layout *
+get_mb_layout(int cpp)
+{
+        const struct mb_layout *layout = &mb_layouts[ffs(cpp) - 1];
+
+        /* Sanity check the table.  XXX: We should de-duplicate.  */
+        assert(layout->width == vc5_utile_width(cpp) * 2);
+        assert(layout->tile_row_stride == vc5_utile_width(cpp));
+
+        return layout;
+}
+
+/** Return the width in pixels of a 64-byte microtile. */
+uint32_t
+vc5_utile_width(int cpp)
+{
+        switch (cpp) {
+        case 1:
+        case 2:
+                return 8;
+        case 4:
+        case 8:
+                return 4;
+        case 16:
+                return 2;
+        default:
+                unreachable("unknown cpp");
+        }
+}
+
+/** Return the height in pixels of a 64-byte microtile. */
+uint32_t
+vc5_utile_height(int cpp)
+{
+        switch (cpp) {
+        case 1:
+                return 8;
+        case 2:
+        case 4:
+                return 4;
+        case 8:
+        case 16:
+                return 2;
+        default:
+                unreachable("unknown cpp");
+        }
+}
+
+/**
+ * Returns the byte address for a given pixel within a utile.
+ *
+ * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
+ * arrangement.
+ */
+static inline uint32_t
+vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
+{
+        uint32_t utile_w = vc5_utile_width(cpp);
+        uint32_t utile_h = vc5_utile_height(cpp);
+
+        assert(x < utile_w && y < utile_h);
+
+        return x * cpp + y * utile_w * cpp;
+}
+
+/**
+ * Returns the byte offset for a given pixel in a LINEARTILE layout.
+ *
+ * LINEARTILE is a single line of utiles in either the X or Y direction.
+ */
+static inline uint32_t
+vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
+{
+        uint32_t utile_w = vc5_utile_width(cpp);
+        uint32_t utile_h = vc5_utile_height(cpp);
+        uint32_t utile_index_x = x / utile_w;
+        uint32_t utile_index_y = y / utile_h;
+
+        assert(utile_index_x == 0 || utile_index_y == 0);
+
+        return (64 * (utile_index_x + utile_index_y) +
+                vc5_get_utile_pixel_offset(cpp,
+                                           x & (utile_w - 1),
+                                           y & (utile_h - 1)));
+}
+
+/**
+ * Returns the byte offset for a given pixel in a UBLINEAR layout.
+ *
+ * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
+ * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
+ */
+static inline uint32_t
+vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
+                              int ublinear_number)
+{
+        uint32_t utile_w = vc5_utile_width(cpp);
+        uint32_t utile_h = vc5_utile_height(cpp);
+        uint32_t ub_w = utile_w * 2;
+        uint32_t ub_h = utile_h * 2;
+        uint32_t ub_x = x / ub_w;
+        uint32_t ub_y = y / ub_h;
+
+        return (256 * (ub_y * ublinear_number +
+                       ub_x) +
+                ((x & utile_w) ? 64 : 0) +
+                ((y & utile_h) ? 128 : 0) +
+                + vc5_get_utile_pixel_offset(cpp,
+                                             x & (utile_w - 1),
+                                             y & (utile_h - 1)));
+}
+
+static inline uint32_t
+vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+                                       uint32_t x, uint32_t y)
+{
+        return vc5_get_ublinear_pixel_offset(cpp, x, y, 2);
+}
+
+static inline uint32_t
+vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+                                       uint32_t x, uint32_t y)
+{
+        return vc5_get_ublinear_pixel_offset(cpp, x, y, 1);
+}
+
+/**
+ * Returns the byte offset for a given pixel in a UIF layout.
+ *
+ * UIF is the general VC5 tiling layout shared across 3D, media, and scanout.
+ * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
+ * 4x4 groups, and those 4x4 groups are then stored in raster order.
+ */
+static inline uint32_t
+vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
+{
+        const struct mb_layout *layout = get_mb_layout(cpp);
+        uint32_t mb_width = layout->width;
+        uint32_t mb_height = layout->height;
+        uint32_t log2_mb_width = ffs(mb_width) - 1;
+        uint32_t log2_mb_height = ffs(mb_height) - 1;
+
+        /* Macroblock X, y */
+        uint32_t mb_x = x >> log2_mb_width;
+        uint32_t mb_y = y >> log2_mb_height;
+        /* X, y within the macroblock */
+        uint32_t mb_pixel_x = x - (mb_x << log2_mb_width);
+        uint32_t mb_pixel_y = y - (mb_y << log2_mb_height);
+
+        uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height;
+        uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4;
+
+        uint32_t mb_base_addr = mb_id * 256;
+
+        bool top = mb_pixel_y < mb_height / 2;
+        bool left = mb_pixel_x < mb_width / 2;
+
+        /* Docs have this in pixels, we do bytes here. */
+        uint32_t mb_tile_offset = (!top * 128 + !left * 64);
+
+        uint32_t mb_tile_y = mb_pixel_y & ~(mb_height / 2);
+        uint32_t mb_tile_x = mb_pixel_x & ~(mb_width / 2);
+        uint32_t mb_tile_pixel_id = (mb_tile_y *
+                                     layout->tile_row_stride +
+                                     mb_tile_x);
+
+        uint32_t mb_tile_addr = mb_tile_pixel_id * cpp;
+
+        uint32_t mb_pixel_address = (mb_base_addr +
+                                     mb_tile_offset +
+                                     mb_tile_addr);
+
+        return mb_pixel_address;
+}
+
+static inline void
+vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
+                               void *cpu, uint32_t cpu_stride,
+                               int cpp, uint32_t image_h,
+                               const struct pipe_box *box,
+                               uint32_t (*get_pixel_offset)(uint32_t cpp,
+                                                            uint32_t image_h,
+                                                            uint32_t x, uint32_t y),
+                               bool is_load)
+{
+        for (uint32_t y = 0; y < box->height; y++) {
+                void *cpu_row = cpu + y * cpu_stride;
+
+                for (int x = 0; x < box->width; x++) {
+                        uint32_t pixel_offset = get_pixel_offset(cpp, image_h,
+                                                                 box->x + x,
+                                                                 box->y + y);
+
+                        if (false) {
+                                fprintf(stderr, "%3d,%3d -> %d\n",
+                                        box->x + x, box->y + y,
+                                        pixel_offset);
+                        }
+
+                        if (is_load) {
+                                memcpy(cpu_row + x * cpp,
+                                       gpu + pixel_offset,
+                                       cpp);
+                        } else {
+                                memcpy(gpu + pixel_offset,
+                                       cpu_row + x * cpp,
+                                       cpp);
+                        }
+                }
+        }
+}
+
+static inline void
+vc5_move_pixels_general(void *gpu, uint32_t gpu_stride,
+                               void *cpu, uint32_t cpu_stride,
+                               int cpp, uint32_t image_h,
+                               const struct pipe_box *box,
+                               uint32_t (*get_pixel_offset)(uint32_t cpp,
+                                                            uint32_t image_h,
+                                                            uint32_t x, uint32_t y),
+                               bool is_load)
+{
+        switch (cpp) {
+        case 1:
+                vc5_move_pixels_general_percpp(gpu, gpu_stride,
+                                               cpu, cpu_stride,
+                                               1, image_h, box,
+                                               get_pixel_offset,
+                                               is_load);
+                break;
+        case 2:
+                vc5_move_pixels_general_percpp(gpu, gpu_stride,
+                                               cpu, cpu_stride,
+                                               2, image_h, box,
+                                               get_pixel_offset,
+                                               is_load);
+                break;
+        case 4:
+                vc5_move_pixels_general_percpp(gpu, gpu_stride,
+                                               cpu, cpu_stride,
+                                               4, image_h, box,
+                                               get_pixel_offset,
+                                               is_load);
+                break;
+        case 8:
+                vc5_move_pixels_general_percpp(gpu, gpu_stride,
+                                               cpu, cpu_stride,
+                                               8, image_h, box,
+                                               get_pixel_offset,
+                                               is_load);
+                break;
+        case 16:
+                vc5_move_pixels_general_percpp(gpu, gpu_stride,
+                                               cpu, cpu_stride,
+                                               16, image_h, box,
+                                               get_pixel_offset,
+                                               is_load);
+                break;
+        }
+}
+
+static inline void
+vc5_move_tiled_image(void *gpu, uint32_t gpu_stride,
+                     void *cpu, uint32_t cpu_stride,
+                     enum vc5_tiling_mode tiling_format,
+                     int cpp,
+                     uint32_t image_h,
+                     const struct pipe_box *box,
+                     bool is_load)
+{
+        switch (tiling_format) {
+        case VC5_TILING_UIF_NO_XOR:
+                vc5_move_pixels_general(gpu, gpu_stride,
+                                        cpu, cpu_stride,
+                                        cpp, image_h, box,
+                                        vc5_get_uif_pixel_offset,
+                                        is_load);
+                break;
+        case VC5_TILING_UBLINEAR_2_COLUMN:
+                vc5_move_pixels_general(gpu, gpu_stride,
+                                        cpu, cpu_stride,
+                                        cpp, image_h, box,
+                                        vc5_get_ublinear_2_column_pixel_offset,
+                                        is_load);
+                break;
+        case VC5_TILING_UBLINEAR_1_COLUMN:
+                vc5_move_pixels_general(gpu, gpu_stride,
+                                        cpu, cpu_stride,
+                                        cpp, image_h, box,
+                                        vc5_get_ublinear_1_column_pixel_offset,
+                                        is_load);
+                break;
+        case VC5_TILING_LINEARTILE:
+                vc5_move_pixels_general(gpu, gpu_stride,
+                                        cpu, cpu_stride,
+                                        cpp, image_h, box,
+                                        vc5_get_lt_pixel_offset,
+                                        is_load);
+                break;
+        default:
+                unreachable("Unsupported tiling format");
+                break;
+        }
+}
+
+/**
+ * Loads pixel data from the start (microtile-aligned) box in \p src to the
+ * start of \p dst according to the given tiling format.
+ */
+void
+vc5_load_tiled_image(void *dst, uint32_t dst_stride,
+                     void *src, uint32_t src_stride,
+                     enum vc5_tiling_mode tiling_format, int cpp,
+                     uint32_t image_h,
+                     const struct pipe_box *box)
+{
+        vc5_move_tiled_image(src, src_stride,
+                             dst, dst_stride,
+                             tiling_format,
+                             cpp,
+                             image_h,
+                             box,
+                             true);
+}
+
+/**
+ * Stores pixel data from the start of \p src into a (microtile-aligned) box in
+ * \p dst according to the given tiling format.
+ */
+void
+vc5_store_tiled_image(void *dst, uint32_t dst_stride,
+                      void *src, uint32_t src_stride,
+                      enum vc5_tiling_mode tiling_format, int cpp,
+                      uint32_t image_h,
+                      const struct pipe_box *box)
+{
+        vc5_move_tiled_image(dst, dst_stride,
+                             src, src_stride,
+                             tiling_format,
+                             cpp,
+                             image_h,
+                             box,
+                             false);
+}
diff --git a/src/gallium/drivers/vc5/vc5_tiling.h b/src/gallium/drivers/vc5/vc5_tiling.h
new file mode 100644 (file)
index 0000000..d3cf48c
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_TILING_H
+#define VC5_TILING_H
+
+uint32_t vc5_utile_width(int cpp) ATTRIBUTE_CONST;
+uint32_t vc5_utile_height(int cpp) ATTRIBUTE_CONST;
+bool vc5_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST;
+void vc5_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp);
+void vc5_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp);
+void vc5_load_tiled_image(void *dst, uint32_t dst_stride,
+                          void *src, uint32_t src_stride,
+                          enum vc5_tiling_mode tiling_format, int cpp,
+                          uint32_t image_h,
+                          const struct pipe_box *box);
+void vc5_store_tiled_image(void *dst, uint32_t dst_stride,
+                           void *src, uint32_t src_stride,
+                           enum vc5_tiling_mode tiling_format, int cpp,
+                           uint32_t image_h,
+                           const struct pipe_box *box);
+
+#endif /* VC5_TILING_H */
diff --git a/src/gallium/drivers/vc5/vc5_uniforms.c b/src/gallium/drivers/vc5/vc5_uniforms.c
new file mode 100644 (file)
index 0000000..dc444fe
--- /dev/null
@@ -0,0 +1,417 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_pack_color.h"
+#include "util/format_srgb.h"
+
+#include "vc5_context.h"
+#include "compiler/v3d_compiler.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+#if 0
+
+#define SWIZ(x,y,z,w) {          \
+        PIPE_SWIZZLE_##x, \
+        PIPE_SWIZZLE_##y, \
+        PIPE_SWIZZLE_##z, \
+        PIPE_SWIZZLE_##w  \
+}
+
+static void
+write_texture_border_color(struct vc5_job *job,
+                           struct vc5_cl_out **uniforms,
+                           struct vc5_texture_stateobj *texstate,
+                           uint32_t unit)
+{
+        struct pipe_sampler_state *sampler = texstate->samplers[unit];
+        struct pipe_sampler_view *texture = texstate->textures[unit];
+        struct vc5_resource *rsc = vc5_resource(texture->texture);
+        union util_color uc;
+
+        const struct util_format_description *tex_format_desc =
+                util_format_description(texture->format);
+
+        float border_color[4];
+        for (int i = 0; i < 4; i++)
+                border_color[i] = sampler->border_color.f[i];
+        if (util_format_is_srgb(texture->format)) {
+                for (int i = 0; i < 3; i++)
+                        border_color[i] =
+                                util_format_linear_to_srgb_float(border_color[i]);
+        }
+
+        /* Turn the border color into the layout of channels that it would
+         * have when stored as texture contents.
+         */
+        float storage_color[4];
+        util_format_unswizzle_4f(storage_color,
+                                 border_color,
+                                 tex_format_desc->swizzle);
+
+        /* Now, pack so that when the vc5_format-sampled texture contents are
+         * replaced with our border color, the vc5_get_format_swizzle()
+         * swizzling will get the right channels.
+         */
+        if (util_format_is_depth_or_stencil(texture->format)) {
+                uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
+                                       sampler->border_color.f[0]) << 8;
+        } else {
+                switch (rsc->vc5_format) {
+                default:
+                case VC5_TEXTURE_TYPE_RGBA8888:
+                        util_pack_color(storage_color,
+                                        PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+                        break;
+                case VC5_TEXTURE_TYPE_RGBA4444:
+                        util_pack_color(storage_color,
+                                        PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
+                        break;
+                case VC5_TEXTURE_TYPE_RGB565:
+                        util_pack_color(storage_color,
+                                        PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+                        break;
+                case VC5_TEXTURE_TYPE_ALPHA:
+                        uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
+                        break;
+                case VC5_TEXTURE_TYPE_LUMALPHA:
+                        uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
+                                    (float_to_ubyte(storage_color[0]) << 0));
+                        break;
+                }
+        }
+
+        cl_aligned_u32(uniforms, uc.ui[0]);
+}
+#endif
+
+static uint32_t
+get_texrect_scale(struct vc5_texture_stateobj *texstate,
+                  enum quniform_contents contents,
+                  uint32_t data)
+{
+        struct pipe_sampler_view *texture = texstate->textures[data];
+        uint32_t dim;
+
+        if (contents == QUNIFORM_TEXRECT_SCALE_X)
+                dim = texture->texture->width0;
+        else
+                dim = texture->texture->height0;
+
+        return fui(1.0f / dim);
+}
+
+static uint32_t
+get_texture_size(struct vc5_texture_stateobj *texstate,
+                 enum quniform_contents contents,
+                 uint32_t data)
+{
+        struct pipe_sampler_view *texture = texstate->textures[data];
+
+        switch (contents) {
+        case QUNIFORM_TEXTURE_WIDTH:
+                return u_minify(texture->texture->width0,
+                                texture->u.tex.first_level);
+        case QUNIFORM_TEXTURE_HEIGHT:
+                return u_minify(texture->texture->height0,
+                                texture->u.tex.first_level);
+        case QUNIFORM_TEXTURE_DEPTH:
+                return u_minify(texture->texture->depth0,
+                                texture->u.tex.first_level);
+        case QUNIFORM_TEXTURE_ARRAY_SIZE:
+                return texture->texture->array_size;
+        case QUNIFORM_TEXTURE_LEVELS:
+                return (texture->u.tex.last_level -
+                        texture->u.tex.first_level) + 1;
+        default:
+                unreachable("Bad texture size field");
+        }
+}
+
+static struct vc5_bo *
+vc5_upload_ubo(struct vc5_context *vc5,
+               struct vc5_compiled_shader *shader,
+               const uint32_t *gallium_uniforms)
+{
+        if (!shader->prog_data.base->ubo_size)
+                return NULL;
+
+        struct vc5_bo *ubo = vc5_bo_alloc(vc5->screen,
+                                          shader->prog_data.base->ubo_size,
+                                          "ubo");
+        void *data = vc5_bo_map(ubo);
+        for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) {
+                memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset,
+                       ((const void *)gallium_uniforms +
+                        shader->prog_data.base->ubo_ranges[i].src_offset),
+                       shader->prog_data.base->ubo_ranges[i].size);
+        }
+
+        return ubo;
+}
+
+/**
+ *  Writes the P0 (CFG_MODE=1) texture parameter.
+ *
+ * Some bits of this field are dependent on the type of sample being done by
+ * the shader, while other bits are dependent on the sampler state.  We OR the
+ * two together here.
+ */
+static void
+write_texture_p0(struct vc5_job *job,
+                 struct vc5_cl_out **uniforms,
+                 struct vc5_texture_stateobj *texstate,
+                 uint32_t unit,
+                 uint32_t shader_data)
+{
+        struct pipe_sampler_state *psampler = texstate->samplers[unit];
+        struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
+
+        cl_aligned_u32(uniforms, shader_data | sampler->p0);
+}
+
+static void
+write_texture_p1(struct vc5_job *job,
+                 struct vc5_cl_out **uniforms,
+                 struct vc5_texture_stateobj *texstate,
+                 uint32_t unit)
+{
+        struct pipe_sampler_view *psview = texstate->textures[unit];
+        struct vc5_sampler_view *sview = vc5_sampler_view(psview);
+
+        struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
+                .texture_state_record_base_address = texstate->texture_state[unit],
+        };
+
+        uint32_t packed;
+        V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect,
+                                                         (uint8_t *)&packed,
+                                                         &unpacked);
+
+        cl_aligned_u32(uniforms, packed | sview->p1);
+}
+
+struct vc5_cl_reloc
+vc5_write_uniforms(struct vc5_context *vc5, struct vc5_compiled_shader *shader,
+                   struct vc5_constbuf_stateobj *cb,
+                   struct vc5_texture_stateobj *texstate)
+{
+        struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms;
+        struct vc5_job *job = vc5->job;
+        const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
+        struct vc5_bo *ubo = vc5_upload_ubo(vc5, shader, gallium_uniforms);
+
+        /* We always need to return some space for uniforms, because the HW
+         * will be prefetching, even if we don't read any in the program.
+         */
+        vc5_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4);
+
+        struct vc5_cl_reloc uniform_stream =
+                cl_address(job->indirect.bo, cl_offset(&job->indirect));
+        vc5_bo_reference(uniform_stream.bo);
+
+        struct vc5_cl_out *uniforms =
+                cl_start(&job->indirect);
+
+        for (int i = 0; i < uinfo->count; i++) {
+
+                switch (uinfo->contents[i]) {
+                case QUNIFORM_CONSTANT:
+                        cl_aligned_u32(&uniforms, uinfo->data[i]);
+                        break;
+                case QUNIFORM_UNIFORM:
+                        cl_aligned_u32(&uniforms,
+                                       gallium_uniforms[uinfo->data[i]]);
+                        break;
+                case QUNIFORM_VIEWPORT_X_SCALE:
+                        cl_aligned_f(&uniforms, vc5->viewport.scale[0] * 256.0f);
+                        break;
+                case QUNIFORM_VIEWPORT_Y_SCALE:
+                        cl_aligned_f(&uniforms, vc5->viewport.scale[1] * 256.0f);
+                        break;
+
+                case QUNIFORM_VIEWPORT_Z_OFFSET:
+                        cl_aligned_f(&uniforms, vc5->viewport.translate[2]);
+                        break;
+                case QUNIFORM_VIEWPORT_Z_SCALE:
+                        cl_aligned_f(&uniforms, vc5->viewport.scale[2]);
+                        break;
+
+                case QUNIFORM_USER_CLIP_PLANE:
+                        cl_aligned_f(&uniforms,
+                                     vc5->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+                        break;
+
+                case QUNIFORM_TEXTURE_CONFIG_P1:
+                        write_texture_p1(job, &uniforms, texstate,
+                                         uinfo->data[i]);
+                        break;
+
+#if 0
+                case QUNIFORM_TEXTURE_FIRST_LEVEL:
+                        write_texture_first_level(job, &uniforms, texstate,
+                                                  uinfo->data[i]);
+                        break;
+#endif
+
+                case QUNIFORM_TEXRECT_SCALE_X:
+                case QUNIFORM_TEXRECT_SCALE_Y:
+                        cl_aligned_u32(&uniforms,
+                                       get_texrect_scale(texstate,
+                                                         uinfo->contents[i],
+                                                         uinfo->data[i]));
+                        break;
+
+                case QUNIFORM_TEXTURE_WIDTH:
+                case QUNIFORM_TEXTURE_HEIGHT:
+                case QUNIFORM_TEXTURE_DEPTH:
+                case QUNIFORM_TEXTURE_ARRAY_SIZE:
+                case QUNIFORM_TEXTURE_LEVELS:
+                        cl_aligned_u32(&uniforms,
+                                       get_texture_size(texstate,
+                                                        uinfo->contents[i],
+                                                        uinfo->data[i]));
+                        break;
+
+                case QUNIFORM_STENCIL:
+                        cl_aligned_u32(&uniforms,
+                                       vc5->zsa->stencil_uniforms[uinfo->data[i]] |
+                                       (uinfo->data[i] <= 1 ?
+                                        (vc5->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+                                        0));
+                        break;
+
+                case QUNIFORM_ALPHA_REF:
+                        cl_aligned_f(&uniforms,
+                                     vc5->zsa->base.alpha.ref_value);
+                        break;
+
+                case QUNIFORM_SAMPLE_MASK:
+                        cl_aligned_u32(&uniforms, vc5->sample_mask);
+                        break;
+
+                case QUNIFORM_UBO_ADDR:
+                        if (uinfo->data[i] == 0) {
+                                cl_aligned_reloc(&job->indirect, &uniforms,
+                                                 ubo, 0);
+                        } else {
+                                int ubo_index = uinfo->data[i];
+                                struct vc5_resource *rsc =
+                                        vc5_resource(cb->cb[ubo_index].buffer);
+
+                                cl_aligned_reloc(&job->indirect, &uniforms,
+                                                 rsc->bo,
+                                                 cb->cb[ubo_index].buffer_offset);
+                        }
+                        break;
+
+                case QUNIFORM_TEXTURE_FIRST_LEVEL:
+                case QUNIFORM_TEXTURE_MSAA_ADDR:
+                case QUNIFORM_TEXTURE_BORDER_COLOR:
+                        /* XXX */
+                        break;
+
+                default:
+                        assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
+
+                        write_texture_p0(job, &uniforms, texstate,
+                                         uinfo->contents[i] -
+                                         QUNIFORM_TEXTURE_CONFIG_P0_0,
+                                         uinfo->data[i]);
+                        break;
+
+                }
+#if 0
+                uint32_t written_val = *((uint32_t *)uniforms - 1);
+                fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n",
+                        shader, i, __gen_address_offset(&uniform_stream) + i * 4,
+                        written_val, uif(written_val));
+#endif
+        }
+
+        cl_end(&job->indirect, uniforms);
+
+        vc5_bo_unreference(&ubo);
+
+        return uniform_stream;
+}
+
+void
+vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader)
+{
+        uint32_t dirty = 0;
+
+        for (int i = 0; i < shader->prog_data.base->uniforms.count; i++) {
+                switch (shader->prog_data.base->uniforms.contents[i]) {
+                case QUNIFORM_CONSTANT:
+                        break;
+                case QUNIFORM_UNIFORM:
+                case QUNIFORM_UBO_ADDR:
+                        dirty |= VC5_DIRTY_CONSTBUF;
+                        break;
+
+                case QUNIFORM_VIEWPORT_X_SCALE:
+                case QUNIFORM_VIEWPORT_Y_SCALE:
+                case QUNIFORM_VIEWPORT_Z_OFFSET:
+                case QUNIFORM_VIEWPORT_Z_SCALE:
+                        dirty |= VC5_DIRTY_VIEWPORT;
+                        break;
+
+                case QUNIFORM_USER_CLIP_PLANE:
+                        dirty |= VC5_DIRTY_CLIP;
+                        break;
+
+                case QUNIFORM_TEXTURE_CONFIG_P1:
+                case QUNIFORM_TEXTURE_BORDER_COLOR:
+                case QUNIFORM_TEXTURE_FIRST_LEVEL:
+                case QUNIFORM_TEXTURE_MSAA_ADDR:
+                case QUNIFORM_TEXRECT_SCALE_X:
+                case QUNIFORM_TEXRECT_SCALE_Y:
+                case QUNIFORM_TEXTURE_WIDTH:
+                case QUNIFORM_TEXTURE_HEIGHT:
+                case QUNIFORM_TEXTURE_DEPTH:
+                case QUNIFORM_TEXTURE_ARRAY_SIZE:
+                case QUNIFORM_TEXTURE_LEVELS:
+                        /* We could flag this on just the stage we're
+                         * compiling for, but it's not passed in.
+                         */
+                        dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
+                        break;
+
+                case QUNIFORM_STENCIL:
+                case QUNIFORM_ALPHA_REF:
+                        dirty |= VC5_DIRTY_ZSA;
+                        break;
+
+                case QUNIFORM_SAMPLE_MASK:
+                        dirty |= VC5_DIRTY_SAMPLE_MASK;
+                        break;
+
+                default:
+                        assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
+                        dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
+                        break;
+                }
+        }
+
+        shader->uniform_dirty_bits = dirty;
+}
index 2d2e1aecec54a6b2e7548ffbfd91fa147ab36fe0..c54f7a69d0df360ffc25c09efc1eec83ac0c9fb6 100644 (file)
@@ -82,6 +82,7 @@ include $(top_srcdir)/src/gallium/drivers/svga/Automake.inc
 include $(top_srcdir)/src/gallium/drivers/freedreno/Automake.inc
 
 include $(top_srcdir)/src/gallium/drivers/vc4/Automake.inc
+include $(top_srcdir)/src/gallium/drivers/vc5/Automake.inc
 include $(top_srcdir)/src/gallium/drivers/pl111/Automake.inc
 
 include $(top_srcdir)/src/gallium/drivers/virgl/Automake.inc
index a831e35bea40e167c5a6c0d0ea885ff7b7cec467..5ee1761fdba8da5ae1363e3e2c7f81a92f8e090d 100644 (file)
@@ -78,6 +78,10 @@ DEFINE_LOADER_DRM_ENTRYPOINT(pl111)
 #endif
 #endif
 
+#if defined(GALLIUM_VC5)
+DEFINE_LOADER_DRM_ENTRYPOINT(vc5)
+#endif
+
 #if defined(GALLIUM_ETNAVIV)
 DEFINE_LOADER_DRM_ENTRYPOINT(imx_drm)
 DEFINE_LOADER_DRM_ENTRYPOINT(etnaviv)
diff --git a/src/gallium/winsys/vc5/drm/Android.mk b/src/gallium/winsys/vc5/drm/Android.mk
new file mode 100644 (file)
index 0000000..3b1523b
--- /dev/null
@@ -0,0 +1,33 @@
+# Copyright (C) 2014 Emil Velikov <emil.l.velikov@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_MODULE := libmesa_winsys_vc5
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/winsys/vc5/drm/Makefile.am b/src/gallium/winsys/vc5/drm/Makefile.am
new file mode 100644 (file)
index 0000000..fc5d1ca
--- /dev/null
@@ -0,0 +1,31 @@
+# Copyright © 2014 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+       -I$(top_srcdir)/src/gallium/drivers \
+       $(GALLIUM_WINSYS_CFLAGS)
+
+noinst_LTLIBRARIES = libvc5drm.la
+
+libvc5drm_la_SOURCES = $(C_SOURCES)
diff --git a/src/gallium/winsys/vc5/drm/Makefile.sources b/src/gallium/winsys/vc5/drm/Makefile.sources
new file mode 100644 (file)
index 0000000..ea7566f
--- /dev/null
@@ -0,0 +1,3 @@
+C_SOURCES := \
+       vc5_drm_public.h \
+       vc5_drm_winsys.c
diff --git a/src/gallium/winsys/vc5/drm/vc5_drm_public.h b/src/gallium/winsys/vc5/drm/vc5_drm_public.h
new file mode 100644 (file)
index 0000000..6e19848
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __VC5_DRM_PUBLIC_H__
+#define __VC5_DRM_PUBLIC_H__
+
+struct pipe_screen;
+
+struct pipe_screen *vc5_drm_screen_create(int drmFD);
+
+#endif /* __VC5_DRM_PUBLIC_H__ */
diff --git a/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c b/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c
new file mode 100644 (file)
index 0000000..d089291
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "vc5_drm_public.h"
+
+#include "vc5/vc5_screen.h"
+
+struct pipe_screen *
+vc5_drm_screen_create(int fd)
+{
+       return vc5_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3));
+}